From 8f11f984817ffe48725e35cffa0fdc45492497e9 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Mon, 30 Oct 2023 07:37:42 -0400 Subject: [PATCH 001/144] [clang][NFC] Assert not llvm_unreachable (#70149) An assert is better here. --- clang/lib/CodeGen/CGExprScalar.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index c25ddeff9adc3a..7633c6b17db88e 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2084,11 +2084,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { Value *Src = Visit(const_cast(E)); llvm::Type *SrcTy = Src->getType(); llvm::Type *DstTy = ConvertType(DestTy); - if (SrcTy->isPtrOrPtrVectorTy() && DstTy->isPtrOrPtrVectorTy() && - SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace()) { - llvm_unreachable("wrong cast for pointers in different address spaces" - "(must be an address space cast)!"); - } + assert( + (!SrcTy->isPtrOrPtrVectorTy() || !DstTy->isPtrOrPtrVectorTy() || + SrcTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace()) && + "Address-space cast must be used to convert address spaces"); if (CGF.SanOpts.has(SanitizerKind::CFIUnrelatedCast)) { if (auto *PT = DestTy->getAs()) { From 97f05956c61e98b62badc6d2449c4e848bf49062 Mon Sep 17 00:00:00 2001 From: tsitdikov <149382295+tsitdikov@users.noreply.github.com> Date: Mon, 30 Oct 2023 11:39:03 +0000 Subject: [PATCH 002/144] Update mlir-spirv-cpu-runner.cpp (#70649) https://github.com/llvm/llvm-project/pull/70568 removed the support for lowering SPIRV to LLVM dialect. We now need to stop using enableOpaquePointers with ConvertSPIRVToLLVMPassOptions. --- mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp index a427d37ae126ed..e3d5b2ff5843ce 100644 --- a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp +++ b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp @@ -94,8 +94,8 @@ static LogicalResult runMLIRPasses(Operation *module, nestedPM.addPass(spirv::createSPIRVUpdateVCEPass()); passManager.addPass(createLowerHostCodeToLLVMPass( enableOpaquePointers(LowerHostCodeToLLVMPassOptions{}))); - passManager.addPass(createConvertSPIRVToLLVMPass( - enableOpaquePointers(ConvertSPIRVToLLVMPassOptions{}))); + passManager.addPass( + createConvertSPIRVToLLVMPass(ConvertSPIRVToLLVMPassOptions{})); return passManager.run(module); } From 7b2e0095bccb2f63e6b6cc8b8e524d206ebf8d69 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Mon, 30 Oct 2023 07:39:45 -0400 Subject: [PATCH 003/144] [clang] Robustify openmp test (#69739) If the source path contains 'alias' this would spuriously fail. Be more specific about not wanting global aliases --- clang/test/OpenMP/declare_variant_device_kind_codegen.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/test/OpenMP/declare_variant_device_kind_codegen.cpp b/clang/test/OpenMP/declare_variant_device_kind_codegen.cpp index daa14f1e3a9312..4f9a86f1e0080d 100644 --- a/clang/test/OpenMP/declare_variant_device_kind_codegen.cpp +++ b/clang/test/OpenMP/declare_variant_device_kind_codegen.cpp @@ -80,7 +80,8 @@ // expected-no-diagnostics -// CHECK-NOT: alias +// Verify no unexpected global symbol aliasing +// CHECK-NOT: @{{[^ ]+}} = {{.*}}alias // CHECK-NOT: ret i32 {{1|4|81|84}} // CHECK-DAG: declare {{.*}}i32 @_Z5bazzzv() From 3e96070b2d25954925bf904dc8841067280ffa27 Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Mon, 30 Oct 2023 12:49:16 +0100 Subject: [PATCH 004/144] [MLIR][LLVM] Avoid exporting broken debug intrinsics without a location (#70643) LLVM IR does not allow debug intrinsics without a debug attachment. The location export can fail the export of a location due to multiple reasons. To deal with this, this commit adds a check to the debug intrinsic's LLVM builders, that skips them, if the location is `nullptr`. Fixes #60222 --- .../mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td | 6 ++++ mlir/test/Target/LLVMIR/llvmir-debug.mlir | 31 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index 040f9895ad0dba..72c932ac07a2e1 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -526,6 +526,9 @@ def LLVM_CoroResumeOp : LLVM_IntrOp<"coro.resume", [], [], [], 0> { class LLVM_DbgIntrOp traits = []> : LLVM_IntrOp { let llvmBuilder = [{ + // Debug intrinsics without debug locations are invalid. + if(!builder.getCurrentDebugLocation()) + return success(); llvm::Module *module = builder.GetInsertBlock()->getModule(); llvm::LLVMContext &ctx = module->getContext(); llvm::Function *fn = @@ -566,6 +569,9 @@ def LLVM_DbgLabelOp : LLVM_IntrOp<"dbg.label", [], [], [], 0> { let summary = "Relates the program to a debug information label."; let arguments = (ins LLVM_DILabelAttr:$label); let llvmBuilder = [{ + // Debug intrinsics without debug locations are invalid. + if(!builder.getCurrentDebugLocation()) + return success(); llvm::Module *module = builder.GetInsertBlock()->getModule(); llvm::LLVMContext &ctx = module->getContext(); llvm::Function *fn = diff --git a/mlir/test/Target/LLVMIR/llvmir-debug.mlir b/mlir/test/Target/LLVMIR/llvmir-debug.mlir index c1e3d723df6675..8d1734d7cdc311 100644 --- a/mlir/test/Target/LLVMIR/llvmir-debug.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-debug.mlir @@ -232,3 +232,34 @@ llvm.func @func_without_subprogram(%0 : i32) { // CHECK: ![[FILE:.*]] = !DIFile(filename: "foo.mlir", directory: "/test/") // CHECK-DAG: ![[FUNC:.*]] = distinct !DISubprogram(name: "func", scope: ![[FILE]] // CHECK-DAG: ![[VAR_LOC]] = !DILocalVariable(name: "a", scope: ![[FUNC]], file: ![[FILE]] + +// ----- + +// Ensures that debug intrinsics without a valid location are not exported to +// avoid broken LLVM IR. + +#di_file = #llvm.di_file<"foo.mlir" in "/test/"> +#di_compile_unit = #llvm.di_compile_unit< + sourceLanguage = DW_LANG_C, file = #di_file, producer = "MLIR", + isOptimized = true, emissionKind = Full +> +#di_subprogram = #llvm.di_subprogram< + compileUnit = #di_compile_unit, scope = #di_file, name = "outer_func", + file = #di_file, subprogramFlags = "Definition|Optimized" +> +#di_local_variable = #llvm.di_local_variable +#declared_var = #llvm.di_local_variable +#di_label = #llvm.di_label + +// CHECK-LABEL: define i32 @dbg_intrinsics_with_no_location( +llvm.func @dbg_intrinsics_with_no_location(%arg0: i32) -> (i32) { + %allocCount = llvm.mlir.constant(1 : i32) : i32 + %alloc = llvm.alloca %allocCount x i64 : (i32) -> !llvm.ptr + // CHECK-NOT: @llvm.dbg.value + llvm.intr.dbg.value #di_local_variable = %arg0 : i32 + // CHECK-NOT: @llvm.dbg.declare + llvm.intr.dbg.declare #declared_var = %alloc : !llvm.ptr + // CHECK-NOT: @llvm.dbg.label + llvm.intr.dbg.label #di_label + llvm.return %arg0 : i32 +} From 46edbce454a58e35b6f026092fe4dadcf5fccb2e Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Mon, 30 Oct 2023 12:50:37 +0100 Subject: [PATCH 005/144] [MLIR][LLVM] Change CAPI pointer factory to create opaque pointers (#70572) This commit changes the LLVM dialect's CAPI pointer getters to drop support for typed pointers. Typed pointers are deprecated and should no longer be generated. --- mlir/include/mlir-c/Dialect/LLVM.h | 2 +- mlir/lib/CAPI/Dialect/LLVM.cpp | 4 ++-- mlir/test/CAPI/llvm.c | 25 ++++++++++++++----------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h index ba98c33fdfd6bc..72701a82225436 100644 --- a/mlir/include/mlir-c/Dialect/LLVM.h +++ b/mlir/include/mlir-c/Dialect/LLVM.h @@ -19,7 +19,7 @@ extern "C" { MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(LLVM, llvm); /// Creates an llvm.ptr type. -MLIR_CAPI_EXPORTED MlirType mlirLLVMPointerTypeGet(MlirType pointee, +MLIR_CAPI_EXPORTED MlirType mlirLLVMPointerTypeGet(MlirContext ctx, unsigned addressSpace); /// Creates an llmv.void type. diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp index d023bf5d68ce5a..b4405f7aac8ab2 100644 --- a/mlir/lib/CAPI/Dialect/LLVM.cpp +++ b/mlir/lib/CAPI/Dialect/LLVM.cpp @@ -16,8 +16,8 @@ using namespace mlir::LLVM; MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(LLVM, llvm, LLVMDialect) -MlirType mlirLLVMPointerTypeGet(MlirType pointee, unsigned addressSpace) { - return wrap(LLVMPointerType::get(unwrap(pointee), addressSpace)); +MlirType mlirLLVMPointerTypeGet(MlirContext ctx, unsigned addressSpace) { + return wrap(LLVMPointerType::get(unwrap(ctx), addressSpace)); } MlirType mlirLLVMVoidTypeGet(MlirContext ctx) { diff --git a/mlir/test/CAPI/llvm.c b/mlir/test/CAPI/llvm.c index 82e1660c15a482..aaec7b113f0a97 100644 --- a/mlir/test/CAPI/llvm.c +++ b/mlir/test/CAPI/llvm.c @@ -10,8 +10,8 @@ // RUN: mlir-capi-llvm-test 2>&1 | FileCheck %s #include "mlir-c/Dialect/LLVM.h" -#include "mlir-c/IR.h" #include "mlir-c/BuiltinTypes.h" +#include "mlir-c/IR.h" #include #include @@ -26,17 +26,20 @@ static void testTypeCreation(MlirContext ctx) { MlirType i32 = mlirIntegerTypeGet(ctx, 32); MlirType i64 = mlirIntegerTypeGet(ctx, 64); - const char *i32p_text = "!llvm.ptr"; - MlirType i32p = mlirLLVMPointerTypeGet(i32, 0); - MlirType i32p_ref = mlirTypeParseGet(ctx, mlirStringRefCreateFromCString(i32p_text)); - // CHECK: !llvm.ptr: 1 - fprintf(stderr, "%s: %d\n", i32p_text, mlirTypeEqual(i32p, i32p_ref)); + const char *ptr_text = "!llvm.ptr"; + MlirType ptr = mlirLLVMPointerTypeGet(ctx, 0); + MlirType ptr_ref = + mlirTypeParseGet(ctx, mlirStringRefCreateFromCString(ptr_text)); + // CHECK: !llvm.ptr: 1 + fprintf(stderr, "%s: %d\n", ptr_text, mlirTypeEqual(ptr, ptr_ref)); - const char *i32p4_text = "!llvm.ptr"; - MlirType i32p4 = mlirLLVMPointerTypeGet(i32, 4); - MlirType i32p4_ref = mlirTypeParseGet(ctx, mlirStringRefCreateFromCString(i32p4_text)); - // CHECK: !llvm.ptr: 1 - fprintf(stderr, "%s: %d\n", i32p4_text, mlirTypeEqual(i32p4, i32p4_ref)); + const char *ptr_addr_text = "!llvm.ptr<42>"; + MlirType ptr_addr = mlirLLVMPointerTypeGet(ctx, 42); + MlirType ptr_addr_ref = + mlirTypeParseGet(ctx, mlirStringRefCreateFromCString(ptr_addr_text)); + // CHECK: !llvm.ptr<42>: 1 + fprintf(stderr, "%s: %d\n", ptr_addr_text, + mlirTypeEqual(ptr_addr, ptr_addr_ref)); const char *voidt_text = "!llvm.void"; MlirType voidt = mlirLLVMVoidTypeGet(ctx); From 8483d18be5b6b5e8721a10eb558be06008307ec6 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Wed, 25 Oct 2023 13:43:25 +0000 Subject: [PATCH 006/144] [mlir][Transform] Relax the applicability of transform.foreach_match to also take into account the op itself --- .../mlir/Dialect/Transform/IR/TransformOps.td | 24 +++++++++++++++---- .../lib/Dialect/Transform/IR/TransformOps.cpp | 12 ++++++---- .../Dialect/Linalg/match-ops-interpreter.mlir | 3 ++- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td index b14c89eadb097d..2fd0e80db96feb 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td @@ -481,8 +481,16 @@ def ForeachMatchOp : TransformDialectOp<"foreach_match", [ This operation consumes the operand and produces a new handle associated with the same payload. This is necessary to trigger invalidation of handles to any of the payload operations nested in the payload operations associated - with the operand, as those are likely to be modified by actions. Note that - the root payload operation associated with the operand are not matched. + with the operand, as those are likely to be modified by actions. + + By default, the root payload operation associated with the operand is not + matched. This is to support the conservative case where applied actions may + invalidate the root payload operation. If the optional `restrict_root` + attribute is set, the root operand is guaranteed to not be invalidated by any + of the applied actions. In such cases, the root payload operation is also + matched. This is useful because matching the root payload operation is a + common idiom, when e.g. matching a func.func directly and operations nested + under it. The operation succeeds if none of the matchers produced a definite failure during application and if all of the applied actions produced success. Note @@ -495,13 +503,19 @@ def ForeachMatchOp : TransformDialectOp<"foreach_match", [ }]; let arguments = (ins TransformHandleTypeInterface:$root, + UnitAttr:$restrict_root, SymbolRefArrayAttr:$matchers, SymbolRefArrayAttr:$actions); let results = (outs TransformHandleTypeInterface:$updated); - let assemblyFormat = - "`in` $root custom($matchers, $actions) " - "attr-dict `:` functional-type($root, $updated)"; + let assemblyFormat = [{ + (`restrict_root` $restrict_root^)? + `in` + $root + custom($matchers, $actions) + attr-dict + `:` functional-type($root, $updated) + }]; let hasVerifier = 1; } diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index 8db77b6059dd2e..514a75b5d59046 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -850,8 +850,9 @@ transform::ForeachMatchOp::apply(transform::TransformRewriter &rewriter, for (Operation *root : state.getPayloadOps(getRoot())) { WalkResult walkResult = root->walk([&](Operation *op) { - // Skip over the root op itself so we don't invalidate it. - if (op == root) + // If getRestrictRoot is not present, skip over the root op itself so we + // don't invalidate it. + if (!getRestrictRoot() && op == root) return WalkResult::advance(); DEBUG_MATCHER({ @@ -1556,10 +1557,10 @@ DiagnosedSilenceableFailure transform::MatchOperationEmptyOp::matchOperation( ::std::optional<::mlir::Operation *> maybeCurrent, transform::TransformResults &results, transform::TransformState &state) { if (!maybeCurrent.has_value()) { - DBGS_MATCHER() << "MatchOperationEmptyOp success\n"; + DEBUG_MATCHER({ DBGS_MATCHER() << "MatchOperationEmptyOp success\n"; }); return DiagnosedSilenceableFailure::success(); } - DBGS_MATCHER() << "MatchOperationEmptyOp failure\n"; + DEBUG_MATCHER({ DBGS_MATCHER() << "MatchOperationEmptyOp failure\n"; }); return emitSilenceableError() << "operation is not empty"; } @@ -1961,7 +1962,8 @@ void transform::NamedSequenceOp::build(OpBuilder &builder, state.addAttribute(SymbolTable::getSymbolAttrName(), builder.getStringAttr(symName)); state.addAttribute(getFunctionTypeAttrName(state.name), - TypeAttr::get(FunctionType::get(builder.getContext(), rootType, resultTypes))); + TypeAttr::get(FunctionType::get(builder.getContext(), + rootType, resultTypes))); state.attributes.append(attrs.begin(), attrs.end()); state.addRegion(); diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir index 9489aadac843d7..c88945c8a5c60f 100644 --- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir +++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir @@ -100,12 +100,13 @@ module attributes { transform.with_named_sequence } { } transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.consumed}) { - transform.foreach_match in %arg0 + transform.foreach_match restrict_root in %arg0 @match_structured_suppress -> @do_nothing : (!transform.any_op) -> !transform.any_op transform.yield } + // expected-remark @below {{other}} func.func @payload() attributes { transform.target_tag = "start_here" } { // expected-remark @below {{other}} %D = arith.constant dense<1.0> : tensor<2x4xf32> From b2929bebb6ce8d75acab4f2fde43213673cb6010 Mon Sep 17 00:00:00 2001 From: Tulio Magno Quites Machado Filho Date: Mon, 30 Oct 2023 08:55:34 -0300 Subject: [PATCH 007/144] [lldb] Adapt code to Python 3.13 (#70445) 1. Remove usage of PyEval_ThreadsInitialized and PyEval_InitThreads Both of these functions were removed in Python 3.13 [1] after being deprecated since Python 3.9. According to "What's new in Python 3.13" document [1]: Since Python 3.7, Py_Initialize() always creates the GIL: calling PyEval_InitThreads() did nothing and PyEval_ThreadsInitialized() always returned non-zero. 2. Replace _Py_IsFinalizing() with Py_IsFinalizing(). [1] https://docs.python.org/3.13/whatsnew/3.13.html --- .../ScriptInterpreter/Python/PythonDataObjects.cpp | 4 +++- .../ScriptInterpreter/Python/ScriptInterpreterPython.cpp | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp index 9ac840a4a102da..fe3438c4247154 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp @@ -71,7 +71,9 @@ Expected python::As(Expected &&obj) { } static bool python_is_finalizing() { -#if PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 7 +#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 13) || (PY_MAJOR_VERSION > 3) + return Py_IsFinalizing(); +#elif PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 7 return _Py_Finalizing != nullptr; #else return _Py_IsFinalizing(); diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index a57c8e4984ad8a..968cc8ca03001e 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -179,18 +179,27 @@ struct InitializePythonRAII { return; #endif +// `PyEval_ThreadsInitialized` was deprecated in Python 3.9 and removed in +// Python 3.13. It has been returning `true` always since Python 3.7. +#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 9) || (PY_MAJOR_VERSION < 3) if (PyEval_ThreadsInitialized()) { +#endif Log *log = GetLog(LLDBLog::Script); m_was_already_initialized = true; m_gil_state = PyGILState_Ensure(); LLDB_LOGV(log, "Ensured PyGILState. Previous state = {0}locked\n", m_gil_state == PyGILState_UNLOCKED ? "un" : ""); + +// `PyEval_InitThreads` was deprecated in Python 3.9 and removed in +// Python 3.13. +#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 9) || (PY_MAJOR_VERSION < 3) return; } // InitThreads acquires the GIL if it hasn't been called before. PyEval_InitThreads(); +#endif } PyGILState_STATE m_gil_state = PyGILState_UNLOCKED; From f6643263631bcb0d191ef923963ac1a5ca9ac5fd Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 30 Oct 2023 12:57:03 +0100 Subject: [PATCH 008/144] Remove the opaque pointers flag from tools This has been the default for a while and the flags are slowly going away. NFCI. --- mlir/test/lib/Dialect/LLVM/TestLowerToLLVM.cpp | 14 +++----------- .../mlir-spirv-cpu-runner.cpp | 11 ++--------- .../mlir-vulkan-runner/mlir-vulkan-runner.cpp | 16 ++++------------ 3 files changed, 9 insertions(+), 32 deletions(-) diff --git a/mlir/test/lib/Dialect/LLVM/TestLowerToLLVM.cpp b/mlir/test/lib/Dialect/LLVM/TestLowerToLLVM.cpp index 8d61ec44214f88..10c21612f64ac6 100644 --- a/mlir/test/lib/Dialect/LLVM/TestLowerToLLVM.cpp +++ b/mlir/test/lib/Dialect/LLVM/TestLowerToLLVM.cpp @@ -48,11 +48,6 @@ void buildTestLowerToLLVM(OpPassManager &pm, // unrealized casts, but there needs to be the final module-wise cleanup in // the end. Keep module-level for now. - auto enableOpaquePointers = [](auto options) { - options.useOpaquePointers = true; - return options; - }; - // Blanket-convert any remaining high-level vector ops to loops if any remain. pm.addNestedPass(createConvertVectorToSCFPass()); // Blanket-convert any remaining linalg ops to loops if any remain. @@ -67,8 +62,7 @@ void buildTestLowerToLLVM(OpPassManager &pm, // Convert vector to LLVM (always needed). pm.addPass(createConvertVectorToLLVMPass( // TODO: add more options on a per-need basis. - enableOpaquePointers( - ConvertVectorToLLVMPassOptions{options.reassociateFPReductions}))); + ConvertVectorToLLVMPassOptions{options.reassociateFPReductions})); // Convert Math to LLVM (always needed). pm.addNestedPass(createConvertMathToLLVMPass()); // Expand complicated MemRef operations before lowering them. @@ -76,11 +70,9 @@ void buildTestLowerToLLVM(OpPassManager &pm, // The expansion may create affine expressions. Get rid of them. pm.addPass(createLowerAffinePass()); // Convert MemRef to LLVM (always needed). - pm.addPass(createFinalizeMemRefToLLVMConversionPass( - enableOpaquePointers(FinalizeMemRefToLLVMConversionPassOptions{}))); + pm.addPass(createFinalizeMemRefToLLVMConversionPass()); // Convert Func to LLVM (always needed). - pm.addPass(createConvertFuncToLLVMPass( - enableOpaquePointers(ConvertFuncToLLVMPassOptions{}))); + pm.addPass(createConvertFuncToLLVMPass()); // Convert Index to LLVM (always needed). pm.addPass(createConvertIndexToLLVMPass()); // Convert remaining unrealized_casts (always needed). diff --git a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp index e3d5b2ff5843ce..7e0b51cac80621 100644 --- a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp +++ b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp @@ -84,18 +84,11 @@ static LogicalResult runMLIRPasses(Operation *module, passManager.addPass(createGpuKernelOutliningPass()); passManager.addPass(createConvertGPUToSPIRVPass(/*mapMemorySpace=*/true)); - auto enableOpaquePointers = [](auto options) { - options.useOpaquePointers = true; - return options; - }; - OpPassManager &nestedPM = passManager.nest(); nestedPM.addPass(spirv::createSPIRVLowerABIAttributesPass()); nestedPM.addPass(spirv::createSPIRVUpdateVCEPass()); - passManager.addPass(createLowerHostCodeToLLVMPass( - enableOpaquePointers(LowerHostCodeToLLVMPassOptions{}))); - passManager.addPass( - createConvertSPIRVToLLVMPass(ConvertSPIRVToLLVMPassOptions{})); + passManager.addPass(createLowerHostCodeToLLVMPass()); + passManager.addPass(createConvertSPIRVToLLVMPass()); return passManager.run(module); } diff --git a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp index 0588fcd265f3f7..d3ec890bf48590 100644 --- a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp +++ b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp @@ -70,25 +70,17 @@ static LogicalResult runMLIRPasses(Operation *op, if (options.spirvWebGPUPrepare) modulePM.addPass(spirv::createSPIRVWebGPUPreparePass()); - auto enableOpaquePointers = [](auto passOption) { - passOption.useOpaquePointers = true; - return passOption; - }; - passManager.addPass(createConvertGpuLaunchFuncToVulkanLaunchFuncPass()); - passManager.addPass(createFinalizeMemRefToLLVMConversionPass( - enableOpaquePointers(FinalizeMemRefToLLVMConversionPassOptions{}))); - passManager.addPass(createConvertVectorToLLVMPass( - enableOpaquePointers(ConvertVectorToLLVMPassOptions{}))); + passManager.addPass(createFinalizeMemRefToLLVMConversionPass()); + passManager.addPass(createConvertVectorToLLVMPass()); passManager.nest().addPass(LLVM::createRequestCWrappersPass()); ConvertFuncToLLVMPassOptions funcToLLVMOptions{}; funcToLLVMOptions.indexBitwidth = DataLayout(module).getTypeSizeInBits(IndexType::get(module.getContext())); passManager.addPass( - createConvertFuncToLLVMPass(enableOpaquePointers(funcToLLVMOptions))); + createConvertFuncToLLVMPass(funcToLLVMOptions)); passManager.addPass(createReconcileUnrealizedCastsPass()); - passManager.addPass(createConvertVulkanLaunchFuncToVulkanCallsPass( - enableOpaquePointers(ConvertVulkanLaunchFuncToVulkanCallsPassOptions{}))); + passManager.addPass(createConvertVulkanLaunchFuncToVulkanCallsPass()); return passManager.run(module); } From a1b2ace137385388bf9bd7ea4b6df3ff298900f6 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Mon, 30 Oct 2023 12:12:52 +0000 Subject: [PATCH 009/144] [mlir][ArmSME] Add optional padding and mask operands to tile_load (#69195) Padding and mask are optional, but if one is specified both must be specified. This is consistent with vector.transfer_read. --- .../mlir/Dialect/ArmSME/IR/ArmSMEOps.td | 52 ++++++++++++++++-- mlir/test/Dialect/ArmSME/invalid.mlir | 53 +++++++++++++++++++ mlir/test/Dialect/ArmSME/roundtrip.mlir | 10 ++++ 3 files changed, 112 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td index 9b9dbff10ea2da..b30d0fdb866bd2 100644 --- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td +++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td @@ -231,7 +231,26 @@ def ZeroOp : ArmSME_Op<"zero", [Pure]> { let assemblyFormat = "attr-dict `:` type($res)"; } -def TileLoadOp : ArmSME_Op<"tile_load"> { +def TileLoadOp : ArmSME_Op<"tile_load", [ + AttrSizedOperandSegments, + OptionalTypesMatchWith< + "padding type matches element type of result", + "result", "padding", + "::llvm::cast($_self).getElementType()" + >, + OptionalTypesMatchWith< + "mask has i1 element type and same shape as result", + "result", "mask", + "VectorType(" + "VectorType::Builder(" + "::llvm::cast($_self)" + ").setElementType(IntegerType::get($_self.getContext(), 1)))" + >, + PredOpTrait< + "both `padding` and `mask` should be provided or neither", + CPred<"bool(getPadding()) == bool(getMask())"> + >, +]> { let summary = "Tile load operation"; let description = [{ Loads a 2D SME "virtual tile" from memory defined by a base and indices, @@ -242,6 +261,16 @@ def TileLoadOp : ArmSME_Op<"tile_load"> { dimensions, since the operation is scalable, and the element type must be a scalar that matches the element type of the result. + An optional SSA value `padding` of the same elemental type as the MemRef is + provided to specify a fallback value in the case of masking. + + An optional SSA value `mask` may be specified to mask out elements read + from the MemRef. The `mask` type is an `i1` vector with a shape that + matches how elements are read from the MemRef. Elements whose corresponding + mask element is `0` are masked out and replaced with `padding`. + + If either `padding` or `mask` are specified, both must be specified. + Example 1: Load an 8-bit element ZA tile with horizontal layout (default) from memory (ZA0.B). ```mlir %tile = arm_sme.tile_load %base[%c0, %c0] : memref, vector<[16]x[16]xi8> @@ -256,10 +285,16 @@ def TileLoadOp : ArmSME_Op<"tile_load"> { ```mlir %tile = arm_sme.tile_load %base[%c0, %c0] layout : memref, vector<[1]x[1]xi128> ``` + + Example 4: Masked load of int 32-bit element ZA tile with horizontal layout (default) from memory. + ```mlir + %tile = arm_sme.tile_load %base[%c0, %c0], %pad, %mask : memref, vector<[4]x[4]xf32> + ``` }]; let arguments = (ins Arg:$base, Variadic:$indices, + Optional:$padding, Optional:$mask, ArmSME_TileSliceLayoutAttr:$layout ); let results = (outs SMETile:$result); @@ -273,9 +308,20 @@ def TileLoadOp : ArmSME_Op<"tile_load"> { } }]; + let builders = [ + OpBuilder<(ins "VectorType":$resultType, "Value":$base, + "ValueRange":$indices, "TileSliceLayout":$layout), [{ + build($_builder, $_state, resultType, base, indices, {}, {}, layout); + }]>, + OpBuilder<(ins "VectorType":$resultType, "Value":$base, + "ValueRange":$indices), [{ + build($_builder, $_state, resultType, base, indices, {}, {}, {}); + }]>, + ]; + let assemblyFormat = - "$base `[` $indices `]` (`layout` `` $layout^)? attr-dict " - "`:` type($base) `,` type($result)"; + "$base `[` $indices `]` (`,` $padding `,` $mask^)? (`layout` `` $layout^)?" + "attr-dict `:` type($base) `,` type($result)"; } def TileStoreOp : ArmSME_Op<"tile_store"> { diff --git a/mlir/test/Dialect/ArmSME/invalid.mlir b/mlir/test/Dialect/ArmSME/invalid.mlir index 431009b1b9ede2..25c62f78d84354 100644 --- a/mlir/test/Dialect/ArmSME/invalid.mlir +++ b/mlir/test/Dialect/ArmSME/invalid.mlir @@ -1,5 +1,9 @@ // RUN: mlir-opt %s -split-input-file -verify-diagnostics +//===----------------------------------------------------------------------===// +// arm_sme.cast_tile_to_vector +//===----------------------------------------------------------------------===// + // ----- func.func @arm_sme_cast_tile_to_vector__bad_tile_id_bitwidth(%tile_id : i8) -> vector<[8]x[8]xi16> { @@ -48,6 +52,10 @@ func.func @arm_sme_cast_tile_to_vector_bad_shape(%tile_id : i8) -> vector<[4]x[1 return %0 : vector<[4]x[16]xi8> } +//===----------------------------------------------------------------------===// +// arm_sme.cast_vector_to_tile +//===----------------------------------------------------------------------===// + // ----- func.func @arm_sme_cast_vector_to_tile__bad_tile_id_bitwidth(%vector : vector<[1]x[1]xi128>) -> i32 { @@ -64,6 +72,10 @@ func.func @arm_sme_cast_vector_to_tile__bad_rank_1d(%vector : vector<[16]xi8>) - return %0 : i8 } +//===----------------------------------------------------------------------===// +// arm_sme.get_tile_id +//===----------------------------------------------------------------------===// + // ----- func.func @arm_sme_get_tile_id__bad_type() -> i1 { @@ -72,6 +84,10 @@ func.func @arm_sme_get_tile_id__bad_type() -> i1 { return %0 : i1 } +//===----------------------------------------------------------------------===// +// arm_sme.move_vector_to_tile_slice +//===----------------------------------------------------------------------===// + // ----- func.func @arm_sme_move_vector_to_tile_slice_i8__bad_vector_type(%vector : vector<[8]xi8>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> vector<[16]x[16]xi8> { @@ -90,6 +106,10 @@ func.func @arm_sme_move_vector_to_tile_slice_f32__bad_vector_type(%vector : vect return %0 : vector<[4]x[4]xf32> } +//===----------------------------------------------------------------------===// +// arm_sme.move_tile_slice_to_vector +//===----------------------------------------------------------------------===// + // ----- func.func @arm_sme_move_tile_slice_to_vector__bad_result_type(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> vector<[2]xf64> { @@ -97,3 +117,36 @@ func.func @arm_sme_move_tile_slice_to_vector__bad_result_type(%tile : vector<[4] %0 = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[2]xf64> from vector<[4]x[4]xf32> return %0 : vector<[2]xf64> } + +//===----------------------------------------------------------------------===// +// arm_sme.tile_load +//===----------------------------------------------------------------------===// + +// ----- + +func.func @arm_sme_tile_load__bad_padding_type(%src : memref, %pad : f32, %mask : vector<[2]x[2]xi1>) { + %c0 = arith.constant 0 : index + // expected-note@-2 {{prior use here}} + // expected-error@+1 {{use of value '%pad' expects different type than prior uses: 'f64' vs 'f32'}} + %tile = arm_sme.tile_load %src[%c0, %c0], %pad, %mask : memref, vector<[2]x[2]xf64> + return +} + +// ----- + +func.func @arm_sme_tile_load__bad_mask_type(%src : memref, %pad : f64, %mask : vector<[4]x[4]xi1>) { + %c0 = arith.constant 0 : index + // expected-note@-2 {{prior use here}} + // expected-error@+1 {{use of value '%mask' expects different type than prior uses: 'vector<[2]x[2]xi1>' vs 'vector<[4]x[4]xi1>}} + %tile = arm_sme.tile_load %src[%c0, %c0], %pad, %mask : memref, vector<[2]x[2]xf64> + return +} + +// ----- + +func.func @arm_sme_tile_load__pad_but_no_mask(%src : memref, %pad : f64) { + %c0 = arith.constant 0 : index + // expected-error@+1 {{op failed to verify that both `padding` and `mask` should be provided or neither}} + %tile = arm_sme.tile_load %src[%c0, %c0], %pad, : memref, vector<[2]x[2]xf64> + return +} diff --git a/mlir/test/Dialect/ArmSME/roundtrip.mlir b/mlir/test/Dialect/ArmSME/roundtrip.mlir index e5ba81eff83602..6866137267dc66 100644 --- a/mlir/test/Dialect/ArmSME/roundtrip.mlir +++ b/mlir/test/Dialect/ArmSME/roundtrip.mlir @@ -438,6 +438,16 @@ func.func @arm_sme_tile_load_ver_f64(%src : memref) { // ----- +/// Padding and mask are optional +func.func @arm_sme_tile_load_hor_pad_f64(%src : memref, %pad : f64, %mask : vector<[2]x[2]xi1>) { + // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}], {{.*}}, {{.*}} : memref, vector<[2]x[2]xf64> + %c0 = arith.constant 0 : index + %tile = arm_sme.tile_load %src[%c0, %c0], %pad, %mask : memref, vector<[2]x[2]xf64> + return +} + +// ----- + /// Layout is optional and horizontal is the default, verify it's still parsed. func.func @arm_sme_tile_load_explicit_hor(%src : memref) { // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref, vector<[16]x[16]xi8> From da28c3309413475146fdeb9194e44b7a63b7484e Mon Sep 17 00:00:00 2001 From: "Henrik G. Olsson" Date: Mon, 30 Oct 2023 13:17:26 +0100 Subject: [PATCH 010/144] [UTC] Recognise CHECK lines with globals matched literally (#70050) Previously when using `-p` a.k.a. `--preserve-names` existing lines for checking globals were not recognised as such, leading to the line being kept while also being emitted again, resulting in duplicated CHECK lines. This resolves #70048. --- .../Inputs/global_preserve_name.ll | 13 +++++++++++++ .../Inputs/global_preserve_name.ll.expected | 14 ++++++++++++++ .../update_test_checks/global_preserve_name.test | 7 +++++++ llvm/utils/UpdateTestChecks/common.py | 2 +- 4 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_preserve_name.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_preserve_name.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_checks/global_preserve_name.test diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_preserve_name.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_preserve_name.ll new file mode 100644 index 00000000000000..b872e9d53e2cba --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_preserve_name.ll @@ -0,0 +1,13 @@ +; RUN: opt -S < %s | FileCheck %s + +@G = constant i32 42 + +;. +; CHECK: @G = constant i32 42 +;. +define ptr @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: ret ptr @G +; + ret ptr @G +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_preserve_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_preserve_name.ll.expected new file mode 100644 index 00000000000000..f29ed24be3fec7 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_preserve_name.ll.expected @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --check-globals +; RUN: opt -S < %s | FileCheck %s + +@G = constant i32 42 + +;. +; CHECK: @G = constant i32 42 +;. +define ptr @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: ret ptr @G +; + ret ptr @G +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/global_preserve_name.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/global_preserve_name.test new file mode 100644 index 00000000000000..2ef050abe15b9a --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/global_preserve_name.test @@ -0,0 +1,7 @@ +## Basic test checking that we capture existing lines matching global variable names +# RUN: cp -f %S/Inputs/global_preserve_name.ll %t.ll && %update_test_checks %t.ll --check-globals --preserve-names +# RUN: diff -u %t.ll %S/Inputs/global_preserve_name.ll.expected +## Verify that running without the --global-value-regex flag respects UTC_ARGS, and that the output is a fixed point. +# RUN: %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/global_preserve_name.ll.expected + diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index c8c8d85e0dc68c..33da7b3b8665dd 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -391,7 +391,7 @@ def should_add_line_to_output( m = CHECK_RE.match(input_line) if m and m.group(1) in prefix_set: if skip_global_checks: - global_ir_value_re = re.compile(r"\[\[", flags=(re.M)) + global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M)) return not global_ir_value_re.search(input_line) return False From 526c9b7e37fa12abc17eebc68f21c1d213477ba8 Mon Sep 17 00:00:00 2001 From: martinboehme Date: Mon, 30 Oct 2023 13:18:57 +0100 Subject: [PATCH 011/144] [clang][nullability] Use `proves()` and `assume()` instead of deprecated synonyms. (#70297) --- .../lib/Analysis/FlowSensitive/HTMLLogger.cpp | 9 +- .../Models/ChromiumCheckModel.cpp | 2 +- .../Models/UncheckedOptionalAccessModel.cpp | 22 ++- .../TypeErasedDataflowAnalysis.cpp | 2 +- .../FlowSensitive/ChromiumCheckModelTest.cpp | 4 +- .../FlowSensitive/SignAnalysisTest.cpp | 45 +++--- .../Analysis/FlowSensitive/TransferTest.cpp | 152 +++++++++--------- .../TypeErasedDataflowAnalysisTest.cpp | 74 +++++---- 8 files changed, 149 insertions(+), 161 deletions(-) diff --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp index 8aef1d6f46089d..8329367098b1db 100644 --- a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp +++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp @@ -114,11 +114,10 @@ class ModelDumper { // guaranteed true/false here is valuable and hard to determine by hand. if (auto *B = llvm::dyn_cast(&V)) { JOS.attribute("formula", llvm::to_string(B->formula())); - JOS.attribute( - "truth", Env.flowConditionImplies(B->formula()) ? "true" - : Env.flowConditionImplies(Env.arena().makeNot(B->formula())) - ? "false" - : "unknown"); + JOS.attribute("truth", Env.proves(B->formula()) ? "true" + : Env.proves(Env.arena().makeNot(B->formula())) + ? "false" + : "unknown"); } } void dump(const StorageLocation &L) { diff --git a/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp index 895f4ff04a172f..f49087ababc44c 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp @@ -59,7 +59,7 @@ bool ChromiumCheckModel::transfer(const CFGElement &Element, Environment &Env) { if (const auto *M = dyn_cast(Call->getDirectCallee())) { if (isCheckLikeMethod(CheckDecls, *M)) { // Mark this branch as unreachable. - Env.addToFlowCondition(Env.arena().makeLiteral(false)); + Env.assume(Env.arena().makeLiteral(false)); return true; } } diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index 8bd9a030f50cda..55d0713639d90d 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -413,7 +413,7 @@ bool isEmptyOptional(const Value &OptionalVal, const Environment &Env) { auto *HasValueVal = cast_or_null(OptionalVal.getProperty("has_value")); return HasValueVal != nullptr && - Env.flowConditionImplies(Env.arena().makeNot(HasValueVal->formula())); + Env.proves(Env.arena().makeNot(HasValueVal->formula())); } /// Returns true if and only if `OptionalVal` is initialized and known to be @@ -421,8 +421,7 @@ bool isEmptyOptional(const Value &OptionalVal, const Environment &Env) { bool isNonEmptyOptional(const Value &OptionalVal, const Environment &Env) { auto *HasValueVal = cast_or_null(OptionalVal.getProperty("has_value")); - return HasValueVal != nullptr && - Env.flowConditionImplies(HasValueVal->formula()); + return HasValueVal != nullptr && Env.proves(HasValueVal->formula()); } Value *getValueBehindPossiblePointer(const Expr &E, const Environment &Env) { @@ -490,8 +489,8 @@ void transferValueOrImpl( if (HasValueVal == nullptr) return; - Env.addToFlowCondition(ModelPred(Env, forceBoolValue(Env, *ValueOrPredExpr), - HasValueVal->formula())); + Env.assume(ModelPred(Env, forceBoolValue(Env, *ValueOrPredExpr), + HasValueVal->formula())); } void transferValueOrStringEmptyCall(const clang::Expr *ComparisonExpr, @@ -717,8 +716,8 @@ void transferOptionalAndOptionalCmp(const clang::CXXOperatorCallExpr *CmpExpr, if (auto *RHasVal = getHasValue(Env, Env.getValue(*CmpExpr->getArg(1)))) { if (CmpExpr->getOperator() == clang::OO_ExclaimEqual) CmpValue = &A.makeNot(*CmpValue); - Env.addToFlowCondition(evaluateEquality(A, *CmpValue, LHasVal->formula(), - RHasVal->formula())); + Env.assume(evaluateEquality(A, *CmpValue, LHasVal->formula(), + RHasVal->formula())); } } @@ -729,7 +728,7 @@ void transferOptionalAndValueCmp(const clang::CXXOperatorCallExpr *CmpExpr, if (auto *HasVal = getHasValue(Env, Env.getValue(*E))) { if (CmpExpr->getOperator() == clang::OO_ExclaimEqual) CmpValue = &A.makeNot(*CmpValue); - Env.addToFlowCondition( + Env.assume( evaluateEquality(A, *CmpValue, HasVal->formula(), A.makeLiteral(true))); } } @@ -917,7 +916,7 @@ llvm::SmallVector diagnoseUnwrapCall(const Expr *ObjectExpr, if (auto *OptionalVal = getValueBehindPossiblePointer(*ObjectExpr, Env)) { auto *Prop = OptionalVal->getProperty("has_value"); if (auto *HasValueVal = cast_or_null(Prop)) { - if (Env.flowConditionImplies(HasValueVal->formula())) + if (Env.proves(HasValueVal->formula())) return {}; } } @@ -1004,14 +1003,13 @@ bool UncheckedOptionalAccessModel::merge(QualType Type, const Value &Val1, bool MustNonEmpty1 = isNonEmptyOptional(Val1, Env1); bool MustNonEmpty2 = isNonEmptyOptional(Val2, Env2); if (MustNonEmpty1 && MustNonEmpty2) - MergedEnv.addToFlowCondition(HasValueVal.formula()); + MergedEnv.assume(HasValueVal.formula()); else if ( // Only make the costly calls to `isEmptyOptional` if we got "unknown" // (false) for both calls to `isNonEmptyOptional`. !MustNonEmpty1 && !MustNonEmpty2 && isEmptyOptional(Val1, Env1) && isEmptyOptional(Val2, Env2)) - MergedEnv.addToFlowCondition( - MergedEnv.arena().makeNot(HasValueVal.formula())); + MergedEnv.assume(MergedEnv.arena().makeNot(HasValueVal.formula())); setHasValue(MergedVal, HasValueVal); return true; } diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 23b062665a687c..e54fb2a01ddeea 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -148,7 +148,7 @@ class TerminatorVisitor ConditionValue = false; } - Env.addToFlowCondition(Val->formula()); + Env.assume(Val->formula()); return {&Cond, ConditionValue}; } diff --git a/clang/unittests/Analysis/FlowSensitive/ChromiumCheckModelTest.cpp b/clang/unittests/Analysis/FlowSensitive/ChromiumCheckModelTest.cpp index 1cb51a9cf37c5c..a2762046665a2c 100644 --- a/clang/unittests/Analysis/FlowSensitive/ChromiumCheckModelTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/ChromiumCheckModelTest.cpp @@ -159,7 +159,7 @@ TEST(ChromiumCheckModelTest, CheckSuccessImpliesConditionHolds) { auto *FooVal = cast(Env.getValue(*FooDecl)); - EXPECT_TRUE(Env.flowConditionImplies(FooVal->formula())); + EXPECT_TRUE(Env.proves(FooVal->formula())); }; std::string Code = R"( @@ -190,7 +190,7 @@ TEST(ChromiumCheckModelTest, UnrelatedCheckIgnored) { auto *FooVal = cast(Env.getValue(*FooDecl)); - EXPECT_FALSE(Env.flowConditionImplies(FooVal->formula())); + EXPECT_FALSE(Env.proves(FooVal->formula())); }; std::string Code = R"( diff --git a/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp index f8897929a59cf4..362b0dea58d6b8 100644 --- a/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp @@ -157,44 +157,44 @@ void transferBinary(const BinaryOperator *BO, const MatchFinder::MatchResult &M, switch (BO->getOpcode()) { case BO_GT: // pos > pos - State.Env.addToFlowCondition( + State.Env.assume( A.makeImplies(*Comp, A.makeImplies(RHSProps.Pos->formula(), LHSProps.Pos->formula()))); // pos > zero - State.Env.addToFlowCondition( + State.Env.assume( A.makeImplies(*Comp, A.makeImplies(RHSProps.Zero->formula(), LHSProps.Pos->formula()))); break; case BO_LT: // neg < neg - State.Env.addToFlowCondition( + State.Env.assume( A.makeImplies(*Comp, A.makeImplies(RHSProps.Neg->formula(), LHSProps.Neg->formula()))); // neg < zero - State.Env.addToFlowCondition( + State.Env.assume( A.makeImplies(*Comp, A.makeImplies(RHSProps.Zero->formula(), LHSProps.Neg->formula()))); break; case BO_GE: // pos >= pos - State.Env.addToFlowCondition( + State.Env.assume( A.makeImplies(*Comp, A.makeImplies(RHSProps.Pos->formula(), LHSProps.Pos->formula()))); break; case BO_LE: // neg <= neg - State.Env.addToFlowCondition( + State.Env.assume( A.makeImplies(*Comp, A.makeImplies(RHSProps.Neg->formula(), LHSProps.Neg->formula()))); break; case BO_EQ: - State.Env.addToFlowCondition( + State.Env.assume( A.makeImplies(*Comp, A.makeImplies(RHSProps.Neg->formula(), LHSProps.Neg->formula()))); - State.Env.addToFlowCondition( + State.Env.assume( A.makeImplies(*Comp, A.makeImplies(RHSProps.Zero->formula(), LHSProps.Zero->formula()))); - State.Env.addToFlowCondition( + State.Env.assume( A.makeImplies(*Comp, A.makeImplies(RHSProps.Pos->formula(), LHSProps.Pos->formula()))); break; @@ -215,14 +215,14 @@ void transferUnaryMinus(const UnaryOperator *UO, return; // a is pos ==> -a is neg - State.Env.addToFlowCondition( + State.Env.assume( A.makeImplies(OperandProps.Pos->formula(), UnaryOpProps.Neg->formula())); // a is neg ==> -a is pos - State.Env.addToFlowCondition( + State.Env.assume( A.makeImplies(OperandProps.Neg->formula(), UnaryOpProps.Pos->formula())); // a is zero ==> -a is zero - State.Env.addToFlowCondition(A.makeImplies(OperandProps.Zero->formula(), - UnaryOpProps.Zero->formula())); + State.Env.assume(A.makeImplies(OperandProps.Zero->formula(), + UnaryOpProps.Zero->formula())); } void transferUnaryNot(const UnaryOperator *UO, @@ -235,7 +235,7 @@ void transferUnaryNot(const UnaryOperator *UO, return; // a is neg or pos ==> !a is zero - State.Env.addToFlowCondition(A.makeImplies( + State.Env.assume(A.makeImplies( A.makeOr(OperandProps.Pos->formula(), OperandProps.Neg->formula()), UnaryOpProps.Zero->formula())); @@ -243,11 +243,11 @@ void transferUnaryNot(const UnaryOperator *UO, // put the generic handler, transferExpr maybe? if (auto *UOBoolVal = dyn_cast(UnaryOpValue)) { // !a <==> a is zero - State.Env.addToFlowCondition( + State.Env.assume( A.makeEquals(UOBoolVal->formula(), OperandProps.Zero->formula())); // !a <==> !a is not zero - State.Env.addToFlowCondition(A.makeEquals( - UOBoolVal->formula(), A.makeNot(UnaryOpProps.Zero->formula()))); + State.Env.assume(A.makeEquals(UOBoolVal->formula(), + A.makeNot(UnaryOpProps.Zero->formula()))); } } @@ -391,11 +391,10 @@ BoolValue &mergeBoolValues(BoolValue &Bool1, const Environment &Env1, // path taken - this simplifies the flow condition tracked in `MergedEnv`. // Otherwise, information about which path was taken is used to associate // `MergedBool` with `Bool1` and `Bool2`. - if (Env1.flowConditionImplies(B1) && Env2.flowConditionImplies(B2)) { - MergedEnv.addToFlowCondition(MergedBool.formula()); - } else if (Env1.flowConditionImplies(A.makeNot(B1)) && - Env2.flowConditionImplies(A.makeNot(B2))) { - MergedEnv.addToFlowCondition(A.makeNot(MergedBool.formula())); + if (Env1.proves(B1) && Env2.proves(B2)) { + MergedEnv.assume(MergedBool.formula()); + } else if (Env1.proves(A.makeNot(B1)) && Env2.proves(A.makeNot(B2))) { + MergedEnv.assume(A.makeNot(MergedBool.formula())); } return MergedBool; } @@ -484,7 +483,7 @@ testing::AssertionResult isPropertyImplied(const Environment &Env, if (!Prop) return Result; auto *BVProp = cast(Prop); - if (Env.flowConditionImplies(BVProp->formula()) != Implies) + if (Env.proves(BVProp->formula()) != Implies) return testing::AssertionFailure() << Property << " is " << (Implies ? "not" : "") << " implied" << ", but should " << (Implies ? "" : "not ") << "be"; diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 0c2106777560ee..0f9f13df817075 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -3795,11 +3795,10 @@ TEST(TransferTest, BooleanEquality) { ASSERT_THAT(BarDecl, NotNull()); auto &BarValThen = getFormula(*BarDecl, EnvThen); - EXPECT_TRUE(EnvThen.flowConditionImplies(BarValThen)); + EXPECT_TRUE(EnvThen.proves(BarValThen)); auto &BarValElse = getFormula(*BarDecl, EnvElse); - EXPECT_TRUE( - EnvElse.flowConditionImplies(EnvElse.arena().makeNot(BarValElse))); + EXPECT_TRUE(EnvElse.proves(EnvElse.arena().makeNot(BarValElse))); }); } @@ -3830,11 +3829,10 @@ TEST(TransferTest, BooleanInequality) { ASSERT_THAT(BarDecl, NotNull()); auto &BarValThen = getFormula(*BarDecl, EnvThen); - EXPECT_TRUE( - EnvThen.flowConditionImplies(EnvThen.arena().makeNot(BarValThen))); + EXPECT_TRUE(EnvThen.proves(EnvThen.arena().makeNot(BarValThen))); auto &BarValElse = getFormula(*BarDecl, EnvElse); - EXPECT_TRUE(EnvElse.flowConditionImplies(BarValElse)); + EXPECT_TRUE(EnvElse.proves(BarValElse)); }); } @@ -3853,7 +3851,7 @@ TEST(TransferTest, IntegerLiteralEquality) { auto &Equal = getValueForDecl(ASTCtx, Env, "equal").formula(); - EXPECT_TRUE(Env.flowConditionImplies(Equal)); + EXPECT_TRUE(Env.proves(Equal)); }); } @@ -3890,19 +3888,19 @@ TEST(TransferTest, CorrelatedBranches) { ASSERT_THAT(BDecl, NotNull()); auto &BVal = getFormula(*BDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(Env.arena().makeNot(BVal))); + EXPECT_TRUE(Env.proves(Env.arena().makeNot(BVal))); } { const Environment &Env = getEnvironmentAtAnnotation(Results, "p1"); auto &CVal = getFormula(*CDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(CVal)); + EXPECT_TRUE(Env.proves(CVal)); } { const Environment &Env = getEnvironmentAtAnnotation(Results, "p2"); auto &CVal = getFormula(*CDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(CVal)); + EXPECT_TRUE(Env.proves(CVal)); } }); } @@ -3934,7 +3932,7 @@ TEST(TransferTest, LoopWithAssignmentConverges) { ASSERT_THAT(BarDecl, NotNull()); auto &BarVal = getFormula(*BarDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(Env.arena().makeNot(BarVal))); + EXPECT_TRUE(Env.proves(Env.arena().makeNot(BarVal))); }); } @@ -3967,12 +3965,11 @@ TEST(TransferTest, LoopWithStagedAssignments) { auto &BarVal = getFormula(*BarDecl, Env); auto &ErrVal = getFormula(*ErrDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(BarVal)); + EXPECT_TRUE(Env.proves(BarVal)); // An unsound analysis, for example only evaluating the loop once, can // conclude that `Err` is false. So, we test that this conclusion is not // reached. - EXPECT_FALSE( - Env.flowConditionImplies(Env.arena().makeNot(ErrVal))); + EXPECT_FALSE(Env.proves(Env.arena().makeNot(ErrVal))); }); } @@ -4002,7 +3999,7 @@ TEST(TransferTest, LoopWithReferenceAssignmentConverges) { ASSERT_THAT(BarDecl, NotNull()); auto &BarVal = getFormula(*BarDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(Env.arena().makeNot(BarVal))); + EXPECT_TRUE(Env.proves(Env.arena().makeNot(BarVal))); }); } @@ -4531,11 +4528,10 @@ TEST(TransferTest, IfStmtBranchExtendsFlowCondition) { ASSERT_THAT(FooDecl, NotNull()); auto &ThenFooVal= getFormula(*FooDecl, ThenEnv); - EXPECT_TRUE(ThenEnv.flowConditionImplies(ThenFooVal)); + EXPECT_TRUE(ThenEnv.proves(ThenFooVal)); auto &ElseFooVal = getFormula(*FooDecl, ElseEnv); - EXPECT_TRUE( - ElseEnv.flowConditionImplies(ElseEnv.arena().makeNot(ElseFooVal))); + EXPECT_TRUE(ElseEnv.proves(ElseEnv.arena().makeNot(ElseFooVal))); }); } @@ -4565,11 +4561,11 @@ TEST(TransferTest, WhileStmtBranchExtendsFlowCondition) { ASSERT_THAT(FooDecl, NotNull()); auto &LoopBodyFooVal = getFormula(*FooDecl, LoopBodyEnv); - EXPECT_TRUE(LoopBodyEnv.flowConditionImplies(LoopBodyFooVal)); + EXPECT_TRUE(LoopBodyEnv.proves(LoopBodyFooVal)); auto &AfterLoopFooVal = getFormula(*FooDecl, AfterLoopEnv); - EXPECT_TRUE(AfterLoopEnv.flowConditionImplies( - AfterLoopEnv.arena().makeNot(AfterLoopFooVal))); + EXPECT_TRUE( + AfterLoopEnv.proves(AfterLoopEnv.arena().makeNot(AfterLoopFooVal))); }); } @@ -4606,15 +4602,13 @@ TEST(TransferTest, DoWhileStmtBranchExtendsFlowCondition) { auto &LoopBodyFooVal= getFormula(*FooDecl, LoopBodyEnv); auto &LoopBodyBarVal = getFormula(*BarDecl, LoopBodyEnv); - EXPECT_TRUE(LoopBodyEnv.flowConditionImplies( - A.makeOr(LoopBodyBarVal, LoopBodyFooVal))); + EXPECT_TRUE( + LoopBodyEnv.proves(A.makeOr(LoopBodyBarVal, LoopBodyFooVal))); auto &AfterLoopFooVal = getFormula(*FooDecl, AfterLoopEnv); auto &AfterLoopBarVal = getFormula(*BarDecl, AfterLoopEnv); - EXPECT_TRUE( - AfterLoopEnv.flowConditionImplies(A.makeNot(AfterLoopFooVal))); - EXPECT_TRUE( - AfterLoopEnv.flowConditionImplies(A.makeNot(AfterLoopBarVal))); + EXPECT_TRUE(AfterLoopEnv.proves(A.makeNot(AfterLoopFooVal))); + EXPECT_TRUE(AfterLoopEnv.proves(A.makeNot(AfterLoopBarVal))); }); } @@ -4644,11 +4638,11 @@ TEST(TransferTest, ForStmtBranchExtendsFlowCondition) { ASSERT_THAT(FooDecl, NotNull()); auto &LoopBodyFooVal= getFormula(*FooDecl, LoopBodyEnv); - EXPECT_TRUE(LoopBodyEnv.flowConditionImplies(LoopBodyFooVal)); + EXPECT_TRUE(LoopBodyEnv.proves(LoopBodyFooVal)); auto &AfterLoopFooVal = getFormula(*FooDecl, AfterLoopEnv); - EXPECT_TRUE(AfterLoopEnv.flowConditionImplies( - AfterLoopEnv.arena().makeNot(AfterLoopFooVal))); + EXPECT_TRUE( + AfterLoopEnv.proves(AfterLoopEnv.arena().makeNot(AfterLoopFooVal))); }); } @@ -4673,7 +4667,7 @@ TEST(TransferTest, ForStmtBranchWithoutConditionDoesNotExtendFlowCondition) { ASSERT_THAT(FooDecl, NotNull()); auto &LoopBodyFooVal= getFormula(*FooDecl, LoopBodyEnv); - EXPECT_FALSE(LoopBodyEnv.flowConditionImplies(LoopBodyFooVal)); + EXPECT_FALSE(LoopBodyEnv.proves(LoopBodyFooVal)); }); } @@ -4699,8 +4693,8 @@ TEST(TransferTest, ContextSensitiveOptionDisabled) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_FALSE(Env.flowConditionImplies(FooVal)); - EXPECT_FALSE(Env.flowConditionImplies(Env.arena().makeNot(FooVal))); + EXPECT_FALSE(Env.proves(FooVal)); + EXPECT_FALSE(Env.proves(Env.arena().makeNot(FooVal))); }, {BuiltinOptions{/*.ContextSensitiveOpts=*/std::nullopt}}); } @@ -4838,8 +4832,8 @@ TEST(TransferTest, ContextSensitiveDepthZero) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_FALSE(Env.flowConditionImplies(FooVal)); - EXPECT_FALSE(Env.flowConditionImplies(Env.arena().makeNot(FooVal))); + EXPECT_FALSE(Env.proves(FooVal)); + EXPECT_FALSE(Env.proves(Env.arena().makeNot(FooVal))); }, {BuiltinOptions{ContextSensitiveOptions{/*.Depth=*/0}}}); } @@ -4866,7 +4860,7 @@ TEST(TransferTest, ContextSensitiveSetTrue) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -4893,7 +4887,7 @@ TEST(TransferTest, ContextSensitiveSetFalse) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(Env.arena().makeNot(FooVal))); + EXPECT_TRUE(Env.proves(Env.arena().makeNot(FooVal))); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -4926,12 +4920,12 @@ TEST(TransferTest, ContextSensitiveSetBothTrueAndFalse) { ASSERT_THAT(BarDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); - EXPECT_FALSE(Env.flowConditionImplies(A.makeNot(FooVal))); + EXPECT_TRUE(Env.proves(FooVal)); + EXPECT_FALSE(Env.proves(A.makeNot(FooVal))); auto &BarVal = getFormula(*BarDecl, Env); - EXPECT_FALSE(Env.flowConditionImplies(BarVal)); - EXPECT_TRUE(Env.flowConditionImplies(A.makeNot(BarVal))); + EXPECT_FALSE(Env.proves(BarVal)); + EXPECT_TRUE(Env.proves(A.makeNot(BarVal))); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -4959,8 +4953,8 @@ TEST(TransferTest, ContextSensitiveSetTwoLayersDepthOne) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_FALSE(Env.flowConditionImplies(FooVal)); - EXPECT_FALSE(Env.flowConditionImplies(Env.arena().makeNot(FooVal))); + EXPECT_FALSE(Env.proves(FooVal)); + EXPECT_FALSE(Env.proves(Env.arena().makeNot(FooVal))); }, {BuiltinOptions{ContextSensitiveOptions{/*.Depth=*/1}}}); } @@ -4988,7 +4982,7 @@ TEST(TransferTest, ContextSensitiveSetTwoLayersDepthTwo) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{/*.Depth=*/2}}}); } @@ -5017,8 +5011,8 @@ TEST(TransferTest, ContextSensitiveSetThreeLayersDepthTwo) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_FALSE(Env.flowConditionImplies(FooVal)); - EXPECT_FALSE(Env.flowConditionImplies(Env.arena().makeNot(FooVal))); + EXPECT_FALSE(Env.proves(FooVal)); + EXPECT_FALSE(Env.proves(Env.arena().makeNot(FooVal))); }, {BuiltinOptions{ContextSensitiveOptions{/*.Depth=*/2}}}); } @@ -5047,7 +5041,7 @@ TEST(TransferTest, ContextSensitiveSetThreeLayersDepthThree) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{/*.Depth=*/3}}}); } @@ -5090,8 +5084,8 @@ TEST(TransferTest, ContextSensitiveMutualRecursion) { auto &FooVal = getFormula(*FooDecl, Env); // ... but it also can't prove anything here. - EXPECT_FALSE(Env.flowConditionImplies(FooVal)); - EXPECT_FALSE(Env.flowConditionImplies(Env.arena().makeNot(FooVal))); + EXPECT_FALSE(Env.proves(FooVal)); + EXPECT_FALSE(Env.proves(Env.arena().makeNot(FooVal))); }, {BuiltinOptions{ContextSensitiveOptions{/*.Depth=*/4}}}); } @@ -5124,12 +5118,12 @@ TEST(TransferTest, ContextSensitiveSetMultipleLines) { ASSERT_THAT(BarDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); - EXPECT_FALSE(Env.flowConditionImplies(Env.arena().makeNot(FooVal))); + EXPECT_TRUE(Env.proves(FooVal)); + EXPECT_FALSE(Env.proves(Env.arena().makeNot(FooVal))); auto &BarVal = getFormula(*BarDecl, Env); - EXPECT_FALSE(Env.flowConditionImplies(BarVal)); - EXPECT_TRUE(Env.flowConditionImplies(Env.arena().makeNot(BarVal))); + EXPECT_FALSE(Env.proves(BarVal)); + EXPECT_TRUE(Env.proves(Env.arena().makeNot(BarVal))); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5166,12 +5160,12 @@ TEST(TransferTest, ContextSensitiveSetMultipleBlocks) { ASSERT_THAT(BazDecl, NotNull()); auto &BarVal = getFormula(*BarDecl, Env); - EXPECT_FALSE(Env.flowConditionImplies(BarVal)); - EXPECT_TRUE(Env.flowConditionImplies(Env.arena().makeNot(BarVal))); + EXPECT_FALSE(Env.proves(BarVal)); + EXPECT_TRUE(Env.proves(Env.arena().makeNot(BarVal))); auto &BazVal = getFormula(*BazDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(BazVal)); - EXPECT_FALSE(Env.flowConditionImplies(Env.arena().makeNot(BazVal))); + EXPECT_TRUE(Env.proves(BazVal)); + EXPECT_FALSE(Env.proves(Env.arena().makeNot(BazVal))); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5215,7 +5209,7 @@ TEST(TransferTest, ContextSensitiveReturnTrue) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5240,7 +5234,7 @@ TEST(TransferTest, ContextSensitiveReturnFalse) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(Env.arena().makeNot(FooVal))); + EXPECT_TRUE(Env.proves(Env.arena().makeNot(FooVal))); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5268,7 +5262,7 @@ TEST(TransferTest, ContextSensitiveReturnArg) { ASSERT_THAT(BazDecl, NotNull()); auto &BazVal = getFormula(*BazDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(BazVal)); + EXPECT_TRUE(Env.proves(BazVal)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5316,7 +5310,7 @@ TEST(TransferTest, ContextSensitiveMethodLiteral) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5348,7 +5342,7 @@ TEST(TransferTest, ContextSensitiveMethodGetter) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5380,7 +5374,7 @@ TEST(TransferTest, ContextSensitiveMethodSetter) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5414,7 +5408,7 @@ TEST(TransferTest, ContextSensitiveMethodGetterAndSetter) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5449,7 +5443,7 @@ TEST(TransferTest, ContextSensitiveMethodTwoLayersVoid) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5483,7 +5477,7 @@ TEST(TransferTest, ContextSensitiveMethodTwoLayersReturn) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5514,7 +5508,7 @@ TEST(TransferTest, ContextSensitiveConstructorBody) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5545,7 +5539,7 @@ TEST(TransferTest, ContextSensitiveConstructorInitializer) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5576,7 +5570,7 @@ TEST(TransferTest, ContextSensitiveConstructorDefault) { ASSERT_THAT(FooDecl, NotNull()); auto &FooVal = getFormula(*FooDecl, Env); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }, {BuiltinOptions{ContextSensitiveOptions{}}}); } @@ -5656,7 +5650,7 @@ TEST(TransferTest, ChainedLogicalOps) { ASTContext &ASTCtx) { const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); auto &B = getValueForDecl(ASTCtx, Env, "b").formula(); - EXPECT_TRUE(Env.flowConditionImplies(B)); + EXPECT_TRUE(Env.proves(B)); }); } @@ -5701,30 +5695,30 @@ TEST(TransferTest, NoReturnFunctionInsideShortCircuitedBooleanOp) { auto &A = Env.arena(); // Check that [[p]] is reachable with a non-false flow condition. - EXPECT_FALSE(Env.flowConditionImplies(A.makeLiteral(false))); + EXPECT_FALSE(Env.proves(A.makeLiteral(false))); auto &B1 = getValueForDecl(ASTCtx, Env, "b1").formula(); - EXPECT_TRUE(Env.flowConditionImplies(A.makeNot(B1))); + EXPECT_TRUE(Env.proves(A.makeNot(B1))); auto &NoreturnOnRhsOfAnd = getValueForDecl(ASTCtx, Env, "NoreturnOnRhsOfAnd").formula(); - EXPECT_TRUE(Env.flowConditionImplies(A.makeNot(NoreturnOnRhsOfAnd))); + EXPECT_TRUE(Env.proves(A.makeNot(NoreturnOnRhsOfAnd))); auto &B2 = getValueForDecl(ASTCtx, Env, "b2").formula(); - EXPECT_TRUE(Env.flowConditionImplies(B2)); + EXPECT_TRUE(Env.proves(B2)); auto &NoreturnOnRhsOfOr = getValueForDecl(ASTCtx, Env, "NoreturnOnRhsOfOr") .formula(); - EXPECT_TRUE(Env.flowConditionImplies(NoreturnOnRhsOfOr)); + EXPECT_TRUE(Env.proves(NoreturnOnRhsOfOr)); auto &NoreturnOnLhsMakesAndUnreachable = getValueForDecl( ASTCtx, Env, "NoreturnOnLhsMakesAndUnreachable").formula(); - EXPECT_TRUE(Env.flowConditionImplies(NoreturnOnLhsMakesAndUnreachable)); + EXPECT_TRUE(Env.proves(NoreturnOnLhsMakesAndUnreachable)); auto &NoreturnOnLhsMakesOrUnreachable = getValueForDecl( ASTCtx, Env, "NoreturnOnLhsMakesOrUnreachable").formula(); - EXPECT_TRUE(Env.flowConditionImplies(NoreturnOnLhsMakesOrUnreachable)); + EXPECT_TRUE(Env.proves(NoreturnOnLhsMakesOrUnreachable)); }); } @@ -5944,7 +5938,7 @@ TEST(TransferTest, AnonymousStruct) { S->getChild(*cast(IndirectField->chain().front()))); auto *B = cast(getFieldValue(&AnonStruct, *BDecl, Env)); - ASSERT_TRUE(Env.flowConditionImplies(B->formula())); + ASSERT_TRUE(Env.proves(B->formula())); }); } @@ -5975,7 +5969,7 @@ TEST(TransferTest, AnonymousStructWithInitializer) { *cast(IndirectField->chain().front()))); auto *B = cast(getFieldValue(&AnonStruct, *BDecl, Env)); - ASSERT_TRUE(Env.flowConditionImplies(B->formula())); + ASSERT_TRUE(Env.proves(B->formula())); }); } diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index 8422f3804db549..e33bea47137ad7 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -450,8 +450,7 @@ class SpecialBoolAnalysis final if (IsSet2 == nullptr) return ComparisonResult::Different; - return Env1.flowConditionImplies(IsSet1->formula()) == - Env2.flowConditionImplies(IsSet2->formula()) + return Env1.proves(IsSet1->formula()) == Env2.proves(IsSet2->formula()) ? ComparisonResult::Same : ComparisonResult::Different; } @@ -475,9 +474,8 @@ class SpecialBoolAnalysis final auto &IsSet = MergedEnv.makeAtomicBoolValue(); MergedVal.setProperty("is_set", IsSet); - if (Env1.flowConditionImplies(IsSet1->formula()) && - Env2.flowConditionImplies(IsSet2->formula())) - MergedEnv.addToFlowCondition(IsSet.formula()); + if (Env1.proves(IsSet1->formula()) && Env2.proves(IsSet2->formula())) + MergedEnv.assume(IsSet.formula()); return true; } @@ -544,10 +542,10 @@ TEST_F(JoinFlowConditionsTest, JoinDistinctButProvablyEquivalentValues) { ->formula(); }; - EXPECT_FALSE(Env1.flowConditionImplies(GetFoo(Env1))); - EXPECT_TRUE(Env2.flowConditionImplies(GetFoo(Env2))); - EXPECT_TRUE(Env3.flowConditionImplies(GetFoo(Env3))); - EXPECT_TRUE(Env4.flowConditionImplies(GetFoo(Env4))); + EXPECT_FALSE(Env1.proves(GetFoo(Env1))); + EXPECT_TRUE(Env2.proves(GetFoo(Env2))); + EXPECT_TRUE(Env3.proves(GetFoo(Env3))); + EXPECT_TRUE(Env4.proves(GetFoo(Env4))); }); } @@ -849,11 +847,11 @@ TEST_F(FlowConditionTest, IfStmtSingleVar) { const Environment &Env1 = getEnvironmentAtAnnotation(Results, "p1"); auto &FooVal1 = cast(Env1.getValue(*FooDecl))->formula(); - EXPECT_TRUE(Env1.flowConditionImplies(FooVal1)); + EXPECT_TRUE(Env1.proves(FooVal1)); const Environment &Env2 = getEnvironmentAtAnnotation(Results, "p2"); auto &FooVal2 = cast(Env2.getValue(*FooDecl))->formula(); - EXPECT_FALSE(Env2.flowConditionImplies(FooVal2)); + EXPECT_FALSE(Env2.proves(FooVal2)); }); } @@ -880,11 +878,11 @@ TEST_F(FlowConditionTest, IfStmtSingleNegatedVar) { const Environment &Env1 = getEnvironmentAtAnnotation(Results, "p1"); auto &FooVal1 = cast(Env1.getValue(*FooDecl))->formula(); - EXPECT_FALSE(Env1.flowConditionImplies(FooVal1)); + EXPECT_FALSE(Env1.proves(FooVal1)); const Environment &Env2 = getEnvironmentAtAnnotation(Results, "p2"); auto &FooVal2 = cast(Env2.getValue(*FooDecl))->formula(); - EXPECT_TRUE(Env2.flowConditionImplies(FooVal2)); + EXPECT_TRUE(Env2.proves(FooVal2)); }); } @@ -908,7 +906,7 @@ TEST_F(FlowConditionTest, WhileStmt) { const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); auto &FooVal = cast(Env.getValue(*FooDecl))->formula(); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }); } @@ -931,7 +929,7 @@ TEST_F(FlowConditionTest, WhileStmtWithAssignmentInCondition) { ASTContext &ASTCtx) { const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); auto &FooVal = getValueForDecl(ASTCtx, Env, "Foo").formula(); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }); } @@ -961,14 +959,14 @@ TEST_F(FlowConditionTest, Conjunction) { const Environment &Env1 = getEnvironmentAtAnnotation(Results, "p1"); auto &FooVal1 = cast(Env1.getValue(*FooDecl))->formula(); auto &BarVal1 = cast(Env1.getValue(*BarDecl))->formula(); - EXPECT_TRUE(Env1.flowConditionImplies(FooVal1)); - EXPECT_TRUE(Env1.flowConditionImplies(BarVal1)); + EXPECT_TRUE(Env1.proves(FooVal1)); + EXPECT_TRUE(Env1.proves(BarVal1)); const Environment &Env2 = getEnvironmentAtAnnotation(Results, "p2"); auto &FooVal2 = cast(Env2.getValue(*FooDecl))->formula(); auto &BarVal2 = cast(Env2.getValue(*BarDecl))->formula(); - EXPECT_FALSE(Env2.flowConditionImplies(FooVal2)); - EXPECT_FALSE(Env2.flowConditionImplies(BarVal2)); + EXPECT_FALSE(Env2.proves(FooVal2)); + EXPECT_FALSE(Env2.proves(BarVal2)); }); } @@ -998,14 +996,14 @@ TEST_F(FlowConditionTest, Disjunction) { const Environment &Env1 = getEnvironmentAtAnnotation(Results, "p1"); auto &FooVal1 = cast(Env1.getValue(*FooDecl))->formula(); auto &BarVal1 = cast(Env1.getValue(*BarDecl))->formula(); - EXPECT_FALSE(Env1.flowConditionImplies(FooVal1)); - EXPECT_FALSE(Env1.flowConditionImplies(BarVal1)); + EXPECT_FALSE(Env1.proves(FooVal1)); + EXPECT_FALSE(Env1.proves(BarVal1)); const Environment &Env2 = getEnvironmentAtAnnotation(Results, "p2"); auto &FooVal2 = cast(Env2.getValue(*FooDecl))->formula(); auto &BarVal2 = cast(Env2.getValue(*BarDecl))->formula(); - EXPECT_FALSE(Env2.flowConditionImplies(FooVal2)); - EXPECT_FALSE(Env2.flowConditionImplies(BarVal2)); + EXPECT_FALSE(Env2.proves(FooVal2)); + EXPECT_FALSE(Env2.proves(BarVal2)); }); } @@ -1035,14 +1033,14 @@ TEST_F(FlowConditionTest, NegatedConjunction) { const Environment &Env1 = getEnvironmentAtAnnotation(Results, "p1"); auto &FooVal1 = cast(Env1.getValue(*FooDecl))->formula(); auto &BarVal1 = cast(Env1.getValue(*BarDecl))->formula(); - EXPECT_FALSE(Env1.flowConditionImplies(FooVal1)); - EXPECT_FALSE(Env1.flowConditionImplies(BarVal1)); + EXPECT_FALSE(Env1.proves(FooVal1)); + EXPECT_FALSE(Env1.proves(BarVal1)); const Environment &Env2 = getEnvironmentAtAnnotation(Results, "p2"); auto &FooVal2 = cast(Env2.getValue(*FooDecl))->formula(); auto &BarVal2 = cast(Env2.getValue(*BarDecl))->formula(); - EXPECT_TRUE(Env2.flowConditionImplies(FooVal2)); - EXPECT_TRUE(Env2.flowConditionImplies(BarVal2)); + EXPECT_TRUE(Env2.proves(FooVal2)); + EXPECT_TRUE(Env2.proves(BarVal2)); }); } @@ -1072,14 +1070,14 @@ TEST_F(FlowConditionTest, DeMorgan) { const Environment &Env1 = getEnvironmentAtAnnotation(Results, "p1"); auto &FooVal1 = cast(Env1.getValue(*FooDecl))->formula(); auto &BarVal1 = cast(Env1.getValue(*BarDecl))->formula(); - EXPECT_TRUE(Env1.flowConditionImplies(FooVal1)); - EXPECT_TRUE(Env1.flowConditionImplies(BarVal1)); + EXPECT_TRUE(Env1.proves(FooVal1)); + EXPECT_TRUE(Env1.proves(BarVal1)); const Environment &Env2 = getEnvironmentAtAnnotation(Results, "p2"); auto &FooVal2 = cast(Env2.getValue(*FooDecl))->formula(); auto &BarVal2 = cast(Env2.getValue(*BarDecl))->formula(); - EXPECT_FALSE(Env2.flowConditionImplies(FooVal2)); - EXPECT_FALSE(Env2.flowConditionImplies(BarVal2)); + EXPECT_FALSE(Env2.proves(FooVal2)); + EXPECT_FALSE(Env2.proves(BarVal2)); }); } @@ -1108,7 +1106,7 @@ TEST_F(FlowConditionTest, Join) { const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); auto &FooVal = cast(Env.getValue(*FooDecl))->formula(); - EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + EXPECT_TRUE(Env.proves(FooVal)); }); } @@ -1142,7 +1140,7 @@ TEST_F(FlowConditionTest, OpaqueFlowConditionMergesToOpaqueBool) { auto &BarVal = cast(Env.getValue(*BarDecl))->formula(); - EXPECT_FALSE(Env.flowConditionImplies(BarVal)); + EXPECT_FALSE(Env.proves(BarVal)); }); } @@ -1183,7 +1181,7 @@ TEST_F(FlowConditionTest, OpaqueFieldFlowConditionMergesToOpaqueBool) { auto &BarVal = cast(Env.getValue(*BarDecl))->formula(); - EXPECT_FALSE(Env.flowConditionImplies(BarVal)); + EXPECT_FALSE(Env.proves(BarVal)); }); } @@ -1217,7 +1215,7 @@ TEST_F(FlowConditionTest, OpaqueFlowConditionInsideBranchMergesToOpaqueBool) { auto &BarVal = cast(Env.getValue(*BarDecl))->formula(); - EXPECT_FALSE(Env.flowConditionImplies(BarVal)); + EXPECT_FALSE(Env.proves(BarVal)); }); } @@ -1245,11 +1243,11 @@ TEST_F(FlowConditionTest, PointerToBoolImplicitCast) { const Environment &Env1 = getEnvironmentAtAnnotation(Results, "p1"); auto &FooVal1 = cast(Env1.getValue(*FooDecl))->formula(); - EXPECT_TRUE(Env1.flowConditionImplies(FooVal1)); + EXPECT_TRUE(Env1.proves(FooVal1)); const Environment &Env2 = getEnvironmentAtAnnotation(Results, "p2"); auto &FooVal2 = cast(Env2.getValue(*FooDecl))->formula(); - EXPECT_FALSE(Env2.flowConditionImplies(FooVal2)); + EXPECT_FALSE(Env2.proves(FooVal2)); }); } @@ -1585,7 +1583,7 @@ TEST_F(TopTest, TopUsedInBothBranchesWithoutPrecisionLoss) { auto *BarVal = dyn_cast_or_null(Env.getValue(*BarDecl)); ASSERT_THAT(BarVal, NotNull()); - EXPECT_TRUE(Env.flowConditionImplies( + EXPECT_TRUE(Env.proves( Env.arena().makeEquals(FooVal->formula(), BarVal->formula()))); }); } From 432649700db1bcfd5c991296242195129f03b4b1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 30 Oct 2023 12:43:05 +0000 Subject: [PATCH 012/144] [X86] vec_insert-5.ll - ensure we build with +mmx as we reference x86_mmx types Enabling SSE doesn't guarantee MMX is enabled on all targets Avoids a crash in D152928 (although we still currently see a regression with that patch applied resulting in MMX codegen) --- llvm/test/CodeGen/X86/vec_insert-5.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/X86/vec_insert-5.ll b/llvm/test/CodeGen/X86/vec_insert-5.ll index be155969e0b5e2..34280aa647aab7 100644 --- a/llvm/test/CodeGen/X86/vec_insert-5.ll +++ b/llvm/test/CodeGen/X86/vec_insert-5.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefixes=X64,ALIGN -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3,sse-unaligned-mem | FileCheck %s --check-prefixes=X64,UNALIGN +; RUN: llc < %s -mtriple=i386-unknown -mattr=+mmx,+sse2,+ssse3 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2,+ssse3 | FileCheck %s --check-prefixes=X64,ALIGN +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2,+ssse3,sse-unaligned-mem | FileCheck %s --check-prefixes=X64,UNALIGN ; There are no MMX operations in @t1 From 70904226e12f78344a1c6abfff54fb490e1de988 Mon Sep 17 00:00:00 2001 From: Igor Kirillov Date: Mon, 30 Oct 2023 13:43:26 +0000 Subject: [PATCH 013/144] [LoopVectorize] Enhance Vectorization decisions for predicate tail-folded loops with low trip counts (#69588) * Avoid using `CM_ScalarEpilogueNotAllowedLowTripLoop` for loops known to be predicate tail-folded, delegating to `areRuntimeChecksProfitable` to decide on the profitability of vectorizing loops with runtime checks. * Update the `areRuntimeChecksProfitable` function to consider the `ScalarEpilogueLowering` setting when assessing vectorization of a loop. With this patch, we can make more informed decisions for loops with low trip counts, especially when leveraging Profile-Guided Optimization (PGO) data. --- .../Transforms/Vectorize/LoopVectorize.cpp | 24 ++-- .../runtime-check-trip-count-decisions.ll | 108 ++++++++++++++++++ 2 files changed, 125 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-trip-count-decisions.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 16c761a91ff232..4f547886f60253 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9785,7 +9785,8 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) { static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks, VectorizationFactor &VF, std::optional VScale, Loop *L, - ScalarEvolution &SE) { + ScalarEvolution &SE, + ScalarEpilogueLowering SEL) { InstructionCost CheckCost = Checks.getCost(); if (!CheckCost.isValid()) return false; @@ -9855,11 +9856,13 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks, // RtC < ScalarC * TC * (1 / X) ==> RtC * X / ScalarC < TC double MinTC2 = RtC * 10 / ScalarC; - // Now pick the larger minimum. If it is not a multiple of VF, choose the - // next closest multiple of VF. This should partly compensate for ignoring - // the epilogue cost. + // Now pick the larger minimum. If it is not a multiple of VF and a scalar + // epilogue is allowed, choose the next closest multiple of VF. This should + // partly compensate for ignoring the epilogue cost. uint64_t MinTC = std::ceil(std::max(MinTC1, MinTC2)); - VF.MinProfitableTripCount = ElementCount::getFixed(alignTo(MinTC, IntVF)); + if (SEL == CM_ScalarEpilogueAllowed) + MinTC = alignTo(MinTC, IntVF); + VF.MinProfitableTripCount = ElementCount::getFixed(MinTC); LLVM_DEBUG( dbgs() << "LV: Minimum required TC for runtime checks to be profitable:" @@ -9979,7 +9982,14 @@ bool LoopVectorizePass::processLoop(Loop *L) { else { if (*ExpectedTC > TTI->getMinTripCountTailFoldingThreshold()) { LLVM_DEBUG(dbgs() << "\n"); - SEL = CM_ScalarEpilogueNotAllowedLowTripLoop; + // Predicate tail-folded loops are efficient even when the loop + // iteration count is low. However, setting the epilogue policy to + // `CM_ScalarEpilogueNotAllowedLowTripLoop` prevents vectorizing loops + // with runtime checks. It's more effective to let + // `areRuntimeChecksProfitable` determine if vectorization is beneficial + // for the loop. + if (SEL != CM_ScalarEpilogueNotNeededUsePredicate) + SEL = CM_ScalarEpilogueNotAllowedLowTripLoop; } else { LLVM_DEBUG(dbgs() << " But the target considers the trip count too " "small to consider vectorizing.\n"); @@ -10074,7 +10084,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { Hints.getForce() == LoopVectorizeHints::FK_Enabled; if (!ForceVectorization && !areRuntimeChecksProfitable(Checks, VF, getVScaleForTuning(L, *TTI), L, - *PSE.getSE())) { + *PSE.getSE(), SEL)) { ORE->emit([&]() { return OptimizationRemarkAnalysisAliasing( DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(), diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-trip-count-decisions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-trip-count-decisions.ll new file mode 100644 index 00000000000000..39ef5baa5b0190 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-trip-count-decisions.ll @@ -0,0 +1,108 @@ +; RUN: opt < %s -passes=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S | FileCheck --check-prefixes=CHECK,PREDICATED %s +; RUN: opt < %s -passes=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue -S | FileCheck --check-prefixes=CHECK,SCALAR %s + +; This file contains the same function but with different trip-count PGO hints + +; The function is vectorized if there are no trip-count hints +define i32 @foo_no_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) { +; CHECK-LABEL: @foo_no_trip_count( +; PREDICATED: vector.body +; SCALAR: vector.body +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx + %0 = load i8, ptr %a.index, align 1 + %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx + %1 = load i8, ptr %b.index, align 1 + %2 = add i8 %0, %1 + %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx + store i8 %2, ptr %c.index, align 1 + %inc = add nsw i32 %idx, 1 + %exitcond = icmp eq i32 %idx, %bound + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 0 +} + +; If trip-count is equal to 4, the function is not vectorised +define i32 @foo_low_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) { +; CHECK-LABEL: @foo_low_trip_count( +; PREDICATED-NOT: vector.body +; SCALAR-NOT: vector.body +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx + %0 = load i8, ptr %a.index, align 1 + %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx + %1 = load i8, ptr %b.index, align 1 + %2 = add i8 %0, %1 + %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx + store i8 %2, ptr %c.index, align 1 + %inc = add nsw i32 %idx, 1 + %exitcond = icmp eq i32 %idx, %bound + br i1 %exitcond, label %for.end, label %for.body, !prof !0 + +for.end: ; preds = %for.body + ret i32 0 +} + +; If trip-count is equal to 10, the function is vectorised when predicated tail folding is chosen +define i32 @foo_mid_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) { +; CHECK-LABEL: @foo_mid_trip_count( +; PREDICATED: vector.body +; SCALAR-NOT: vector.body +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx + %0 = load i8, ptr %a.index, align 1 + %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx + %1 = load i8, ptr %b.index, align 1 + %2 = add i8 %0, %1 + %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx + store i8 %2, ptr %c.index, align 1 + %inc = add nsw i32 %idx, 1 + %exitcond = icmp eq i32 %idx, %bound + br i1 %exitcond, label %for.end, label %for.body, !prof !1 + +for.end: ; preds = %for.body + ret i32 0 +} + +; If trip-count is equal to 40, the function is always vectorised +define i32 @foo_high_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) { +; CHECK-LABEL: @foo_high_trip_count( +; PREDICATED: vector.body +; SCALAR: vector.body +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx + %0 = load i8, ptr %a.index, align 1 + %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx + %1 = load i8, ptr %b.index, align 1 + %2 = add i8 %0, %1 + %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx + store i8 %2, ptr %c.index, align 1 + %inc = add nsw i32 %idx, 1 + %exitcond = icmp eq i32 %idx, %bound + br i1 %exitcond, label %for.end, label %for.body, !prof !2 + +for.end: ; preds = %for.body + ret i32 0 +} + +!0 = !{!"branch_weights", i32 10, i32 30} +!1 = !{!"branch_weights", i32 10, i32 90} +!2 = !{!"branch_weights", i32 10, i32 390} From ddd2747b7ee90026854a2957ee756dbaf3109f85 Mon Sep 17 00:00:00 2001 From: Pete Steinfeld <47540744+psteinfeld@users.noreply.github.com> Date: Mon, 30 Oct 2023 06:44:09 -0700 Subject: [PATCH 014/144] [flang] Put ISO_Fortran_binding.h where it can be easily used (#70129) The update stems from the discussion in https://discourse.llvm.org/t/adding-flang-specific-header-files-to-clang/72442 This is my third attempt at this. My second attempt was in pull request #69121. This is my second attempt at this. My first attempt was in pull request #68756. This pull request has three changes from the second one: - I put the test into the Driver directory rather than Examples so that it would get run without require the define LLVM_BUILD_EXAMPLES. - When installing ISO_Fortran_binding.h, I changed the location where it was installed from. - I changed the test so that it would work when flang was built with shared libraries. Here's the information from my previous attempts: I decided to put ISO_Fortran_binding.h in a place where it would be accessible with the include: "#include" rather than "#include" because this is what gfortran implements. Note that the file is also installed into ".../include/flang", so if a user wanted to access the file from a compiler other than clang, it would be available. I added a test in ".../flang/test/Driver". To make the test work, I also needed to put ISO_Fortran_binding.h into the build area. Although the flang project depends on clang, clang may not always be available in a flang build. For example, when building just the "check-flang" target, the "clang" executable may not be available at the time the new test gets run. To account for this, I made the test's script check for the existence of the "clang" executable. If "clang" is not available, it simply prints "PASS". If it is available, it fully builds and executes the test. On success, this will also print "PASS" --- flang/CMakeLists.txt | 17 ++++++++ flang/test/Driver/ctofortran.f90 | 73 ++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 flang/test/Driver/ctofortran.f90 diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index ac30da89995ed3..f81d3e33fe86c0 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -276,6 +276,9 @@ endif() set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}") +if (NOT PACKAGE_VERSION) + set(PACKAGE_VERSION ${LLVM_VERSION_MAJOR}) +endif() if (NOT DEFINED FLANG_VERSION_MAJOR) @@ -490,3 +493,17 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) PATTERN "*.inc" ) endif() + +# Put ISO_Fortran_binding.h into the include files of the build area now +# so that we can run tests before installing +include(GetClangResourceDir) +get_clang_resource_dir(HEADER_BINARY_DIR PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. SUBDIR include) +configure_file( + ${FLANG_SOURCE_DIR}/include/flang/ISO_Fortran_binding.h + ${HEADER_BINARY_DIR}/ISO_Fortran_binding.h) + +# And also install it into the install area +get_clang_resource_dir(HEADER_INSTALL_DIR SUBDIR include) +install( + FILES include/flang/ISO_Fortran_binding.h + DESTINATION ${HEADER_INSTALL_DIR} ) diff --git a/flang/test/Driver/ctofortran.f90 b/flang/test/Driver/ctofortran.f90 new file mode 100644 index 00000000000000..6483e0deb3866e --- /dev/null +++ b/flang/test/Driver/ctofortran.f90 @@ -0,0 +1,73 @@ +! UNSUPPORTED: system-windows +! RUN: split-file %s %t +! RUN: chmod +x %t/runtest.sh +! RUN: %t/runtest.sh %t %flang $t/ffile.f90 $t/cfile.c + +!--- ffile.f90 +subroutine foo(a) bind(c) + integer :: a(:) + if (lbound(a, 1) .ne. 1) then + print *, 'FAIL expected 1 for lbound but got ',lbound(a, 1) + stop 1 + endif + + if (ubound(a, 1) .ne. 10) then + print *, 'FAIL expected 10 for ubound but got ',ubound(a, 1) + stop 1 + endif + + do i = lbound(a,1),ubound(a,1) + !print *, a(i) + if (a(i) .ne. i) then + print *, 'FAIL expected', i, ' for index ',i, ' but got ',a(i) + stop 1 + endif + enddo + print *, 'PASS' +end subroutine foo + +! CHECK: PASS +!--- cfile.c +#include +#include +#include + +void foo(CFI_cdesc_t*); + +int a[10]; + +int main() { + int i, res; + static CFI_CDESC_T(1) r1; + CFI_cdesc_t *desc = (CFI_cdesc_t*)&r1; + CFI_index_t extent[1] = {10}; + + for(i=0; i<10; ++i) { + a[i] = i+1; + } + + res = CFI_establish(desc, (void*)a, CFI_attribute_other, CFI_type_int32_t, + sizeof(int), 1, extent); + if (res != 0) { + printf("FAIL CFI_establish returned %d instead of 0.\n",res); + exit(1); + } + + foo(desc); + return 0; +} +!--- runtest.sh +#!/bin/bash +export BINDIR=`dirname $2` +export CCOMP=$BINDIR/clang +if [ -x $CCOMP ] +then + export LIBDIR=$BINDIR/../lib + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBDIR + $CCOMP -c $1/$4 -o $1/cfile.o + $2 $1/$3 $1/cfile.o -o $1/ctofortran + $1/ctofortran # should print "PASS" +else + # No clang compiler, just pass by default + echo "PASS" +fi From 89f8d35094518ed9734a493cf19e188c56c8d4e4 Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Mon, 30 Oct 2023 15:47:25 +0200 Subject: [PATCH 015/144] [AMDGPU] Fix subtarget predicates for some V_MFMA instructions. (#70450) Resolves AsmParser ambiguity, e.g., V_MFMA_I32_32X32X8I8_vi currently has isGFX908orGFX90A as its subtarget predicate, which makes it identical to V_MFMA_I32_32X32X8I8_gfx90a_acd on GFX90A. Part of . --- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 33 ++++++++++++--------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 539b69651dfedf..b4149729d50e56 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -1072,17 +1072,24 @@ multiclass VOP3P_Real_MFMA_gfx940 op, string Name = !cast(N defm : VOP3P_Real_MFMA_gfx940_aliases; } -multiclass VOP3P_Real_MFMA op, string GFX940Name = !cast(NAME#"_e64").Mnemonic> : - VOP3P_Real_MFMA_gfx90a , - VOP3P_Real_MFMA_gfx940 { +multiclass VOP3P_Real_MFMA_vi op> { def _vi : VOP3P_Real(NAME#"_e64"), SIEncodingFamily.VI>, VOP3Pe_MAI (NAME#"_e64").Pfl, ?> { + let SubtargetPredicate = isGFX8GFX9NotGFX90A; let AssemblerPredicate = HasMAIInsts; let DecoderNamespace = "GFX8"; let Constraints = ""; } } +multiclass VOP3P_Real_MFMA_vi_gfx90a op> : + VOP3P_Real_MFMA_gfx90a , + VOP3P_Real_MFMA_vi ; + +multiclass VOP3P_Real_MFMA op, string GFX940Name = !cast(NAME#"_e64").Mnemonic> : + VOP3P_Real_MFMA_vi_gfx90a , + VOP3P_Real_MFMA_gfx940 ; + multiclass VOP3P_Real_SMFMAC op, string alias> { def _gfx940 : VOP3P_Real(NAME#"_e64"), SIEncodingFamily.VI>, VOP3Pe_SMFMAC { @@ -1143,7 +1150,7 @@ defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>; defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>; defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>; -let SubtargetPredicate = HasMAIInsts in { +let OtherPredicates = [HasMAIInsts] in { defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>; defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x59>; @@ -1161,17 +1168,15 @@ defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MFMA <0x50, "v_mfma_i32_32x32x4_2b_i8"> defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MFMA <0x51, "v_mfma_i32_16x16x4_4b_i8">; defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MFMA <0x52, "v_mfma_i32_4x4x4_16b_i8">; -let SubtargetPredicate = isGFX908orGFX90A in { -defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MFMA <0x55>; -defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MFMA <0x54>; -defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MFMA <0x68>; -defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MFMA <0x69>; -defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MFMA <0x6b>; -defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MFMA <0x6c>; -defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MFMA <0x6d>; -} +defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MFMA_vi_gfx90a <0x55>; +defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MFMA_vi_gfx90a <0x54>; +defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x68>; +defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x69>; +defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x6b>; +defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x6c>; +defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x6d>; -} // End SubtargetPredicate = HasMAIInsts +} // End OtherPredicates = [HasMAIInsts] defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x63>; defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x64>; From e9ff4e4291a2748a87ab53c4e623fdc3a21960c0 Mon Sep 17 00:00:00 2001 From: "Yueh-Ting (eop) Chen" Date: Mon, 30 Oct 2023 21:59:02 +0800 Subject: [PATCH 016/144] [Clang][RISCV] Add vundefine intrinsics for tuple types (#70354) riscv-non-isa/rvv-intrinsic-doc#288 --- clang/include/clang/Basic/riscv_vector.td | 9 + .../non-policy/non-overloaded/vundefined.c | 2245 ++++++++++++++--- 2 files changed, 1959 insertions(+), 295 deletions(-) diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td index c685f3ef6087d8..63316da940594b 100644 --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -2320,6 +2320,15 @@ let HasMasked = false, HasVL = false, IRName = "" in { }] in { def vundefined : RVVBuiltin<"v", "v", "csilxfd">; def vundefined_u : RVVBuiltin<"Uv", "Uv", "csil">; + + foreach nf = NFList in { + let NF = nf in { + defvar T = "(Tuple:" # nf # ")"; + def : RVVBuiltin; + def : RVVBuiltin; + } + } + } // LMUL truncation diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c index 5f474f8a5f5557..721fde373ab124 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ // RUN: -target-feature +zvfh -disable-O0-optnone \ @@ -7,534 +7,2189 @@ #include -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f16mf4 -// CHECK-RV64-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f16mf4( +// CHECK-RV64-SAME: ) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat16mf4_t test_vundefined_f16mf4() { - return __riscv_vundefined_f16mf4(); -} +vfloat16mf4_t test_vundefined_f16mf4() { return __riscv_vundefined_f16mf4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f16mf2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f16mf2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat16mf2_t test_vundefined_f16mf2() { - return __riscv_vundefined_f16mf2(); -} +vfloat16mf2_t test_vundefined_f16mf2() { return __riscv_vundefined_f16mf2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f16m1 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f16m1( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat16m1_t test_vundefined_f16m1() { - return __riscv_vundefined_f16m1(); -} +vfloat16m1_t test_vundefined_f16m1() { return __riscv_vundefined_f16m1(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f16m2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f16m2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat16m2_t test_vundefined_f16m2() { - return __riscv_vundefined_f16m2(); -} +vfloat16m2_t test_vundefined_f16m2() { return __riscv_vundefined_f16m2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f16m4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f16m4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat16m4_t test_vundefined_f16m4() { - return __riscv_vundefined_f16m4(); -} +vfloat16m4_t test_vundefined_f16m4() { return __riscv_vundefined_f16m4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f16m8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f16m8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat16m8_t test_vundefined_f16m8() { - return __riscv_vundefined_f16m8(); -} +vfloat16m8_t test_vundefined_f16m8() { return __riscv_vundefined_f16m8(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f32mf2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f32mf2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat32mf2_t test_vundefined_f32mf2() { - return __riscv_vundefined_f32mf2(); -} +vfloat32mf2_t test_vundefined_f32mf2() { return __riscv_vundefined_f32mf2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f32m1 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f32m1( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat32m1_t test_vundefined_f32m1() { - return __riscv_vundefined_f32m1(); -} +vfloat32m1_t test_vundefined_f32m1() { return __riscv_vundefined_f32m1(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f32m2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f32m2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat32m2_t test_vundefined_f32m2() { - return __riscv_vundefined_f32m2(); -} +vfloat32m2_t test_vundefined_f32m2() { return __riscv_vundefined_f32m2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f32m4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f32m4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat32m4_t test_vundefined_f32m4() { - return __riscv_vundefined_f32m4(); -} +vfloat32m4_t test_vundefined_f32m4() { return __riscv_vundefined_f32m4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f32m8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f32m8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat32m8_t test_vundefined_f32m8() { - return __riscv_vundefined_f32m8(); -} +vfloat32m8_t test_vundefined_f32m8() { return __riscv_vundefined_f32m8(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f64m1 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f64m1( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat64m1_t test_vundefined_f64m1() { - return __riscv_vundefined_f64m1(); -} +vfloat64m1_t test_vundefined_f64m1() { return __riscv_vundefined_f64m1(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f64m2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f64m2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat64m2_t test_vundefined_f64m2() { - return __riscv_vundefined_f64m2(); -} +vfloat64m2_t test_vundefined_f64m2() { return __riscv_vundefined_f64m2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f64m4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f64m4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat64m4_t test_vundefined_f64m4() { - return __riscv_vundefined_f64m4(); -} +vfloat64m4_t test_vundefined_f64m4() { return __riscv_vundefined_f64m4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_f64m8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_f64m8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vfloat64m8_t test_vundefined_f64m8() { - return __riscv_vundefined_f64m8(); -} +vfloat64m8_t test_vundefined_f64m8() { return __riscv_vundefined_f64m8(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8mf8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8mf8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint8mf8_t test_vundefined_i8mf8() { - return __riscv_vundefined_i8mf8(); -} +vint8mf8_t test_vundefined_i8mf8() { return __riscv_vundefined_i8mf8(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8mf4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8mf4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint8mf4_t test_vundefined_i8mf4() { - return __riscv_vundefined_i8mf4(); -} +vint8mf4_t test_vundefined_i8mf4() { return __riscv_vundefined_i8mf4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8mf2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8mf2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint8mf2_t test_vundefined_i8mf2() { - return __riscv_vundefined_i8mf2(); -} +vint8mf2_t test_vundefined_i8mf2() { return __riscv_vundefined_i8mf2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8m1 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8m1( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint8m1_t test_vundefined_i8m1() { - return __riscv_vundefined_i8m1(); -} +vint8m1_t test_vundefined_i8m1() { return __riscv_vundefined_i8m1(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8m2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8m2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint8m2_t test_vundefined_i8m2() { - return __riscv_vundefined_i8m2(); -} +vint8m2_t test_vundefined_i8m2() { return __riscv_vundefined_i8m2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8m4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8m4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint8m4_t test_vundefined_i8m4() { - return __riscv_vundefined_i8m4(); -} +vint8m4_t test_vundefined_i8m4() { return __riscv_vundefined_i8m4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8m8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i8m8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint8m8_t test_vundefined_i8m8() { - return __riscv_vundefined_i8m8(); -} +vint8m8_t test_vundefined_i8m8() { return __riscv_vundefined_i8m8(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i16mf4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i16mf4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint16mf4_t test_vundefined_i16mf4() { - return __riscv_vundefined_i16mf4(); -} +vint16mf4_t test_vundefined_i16mf4() { return __riscv_vundefined_i16mf4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i16mf2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i16mf2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint16mf2_t test_vundefined_i16mf2() { - return __riscv_vundefined_i16mf2(); -} +vint16mf2_t test_vundefined_i16mf2() { return __riscv_vundefined_i16mf2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i16m1 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i16m1( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint16m1_t test_vundefined_i16m1() { - return __riscv_vundefined_i16m1(); -} +vint16m1_t test_vundefined_i16m1() { return __riscv_vundefined_i16m1(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i16m2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i16m2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint16m2_t test_vundefined_i16m2() { - return __riscv_vundefined_i16m2(); -} +vint16m2_t test_vundefined_i16m2() { return __riscv_vundefined_i16m2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i16m4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i16m4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint16m4_t test_vundefined_i16m4() { - return __riscv_vundefined_i16m4(); -} +vint16m4_t test_vundefined_i16m4() { return __riscv_vundefined_i16m4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i16m8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i16m8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint16m8_t test_vundefined_i16m8() { - return __riscv_vundefined_i16m8(); -} +vint16m8_t test_vundefined_i16m8() { return __riscv_vundefined_i16m8(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i32mf2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i32mf2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint32mf2_t test_vundefined_i32mf2() { - return __riscv_vundefined_i32mf2(); -} +vint32mf2_t test_vundefined_i32mf2() { return __riscv_vundefined_i32mf2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i32m1 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i32m1( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint32m1_t test_vundefined_i32m1() { - return __riscv_vundefined_i32m1(); -} +vint32m1_t test_vundefined_i32m1() { return __riscv_vundefined_i32m1(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i32m2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i32m2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint32m2_t test_vundefined_i32m2() { - return __riscv_vundefined_i32m2(); -} +vint32m2_t test_vundefined_i32m2() { return __riscv_vundefined_i32m2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i32m4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i32m4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint32m4_t test_vundefined_i32m4() { - return __riscv_vundefined_i32m4(); -} +vint32m4_t test_vundefined_i32m4() { return __riscv_vundefined_i32m4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i32m8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i32m8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint32m8_t test_vundefined_i32m8() { - return __riscv_vundefined_i32m8(); -} +vint32m8_t test_vundefined_i32m8() { return __riscv_vundefined_i32m8(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i64m1 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i64m1( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint64m1_t test_vundefined_i64m1() { - return __riscv_vundefined_i64m1(); -} +vint64m1_t test_vundefined_i64m1() { return __riscv_vundefined_i64m1(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i64m2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i64m2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint64m2_t test_vundefined_i64m2() { - return __riscv_vundefined_i64m2(); -} +vint64m2_t test_vundefined_i64m2() { return __riscv_vundefined_i64m2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i64m4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i64m4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint64m4_t test_vundefined_i64m4() { - return __riscv_vundefined_i64m4(); -} +vint64m4_t test_vundefined_i64m4() { return __riscv_vundefined_i64m4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_i64m8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_i64m8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vint64m8_t test_vundefined_i64m8() { - return __riscv_vundefined_i64m8(); -} +vint64m8_t test_vundefined_i64m8() { return __riscv_vundefined_i64m8(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8mf8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8mf8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint8mf8_t test_vundefined_u8mf8() { - return __riscv_vundefined_u8mf8(); -} +vuint8mf8_t test_vundefined_u8mf8() { return __riscv_vundefined_u8mf8(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8mf4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8mf4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint8mf4_t test_vundefined_u8mf4() { - return __riscv_vundefined_u8mf4(); -} +vuint8mf4_t test_vundefined_u8mf4() { return __riscv_vundefined_u8mf4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8mf2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8mf2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint8mf2_t test_vundefined_u8mf2() { - return __riscv_vundefined_u8mf2(); -} +vuint8mf2_t test_vundefined_u8mf2() { return __riscv_vundefined_u8mf2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8m1 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8m1( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint8m1_t test_vundefined_u8m1() { - return __riscv_vundefined_u8m1(); -} +vuint8m1_t test_vundefined_u8m1() { return __riscv_vundefined_u8m1(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8m2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8m2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint8m2_t test_vundefined_u8m2() { - return __riscv_vundefined_u8m2(); -} +vuint8m2_t test_vundefined_u8m2() { return __riscv_vundefined_u8m2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8m4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8m4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint8m4_t test_vundefined_u8m4() { - return __riscv_vundefined_u8m4(); -} +vuint8m4_t test_vundefined_u8m4() { return __riscv_vundefined_u8m4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8m8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u8m8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint8m8_t test_vundefined_u8m8() { - return __riscv_vundefined_u8m8(); -} +vuint8m8_t test_vundefined_u8m8() { return __riscv_vundefined_u8m8(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u16mf4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u16mf4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint16mf4_t test_vundefined_u16mf4() { - return __riscv_vundefined_u16mf4(); -} +vuint16mf4_t test_vundefined_u16mf4() { return __riscv_vundefined_u16mf4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u16mf2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u16mf2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint16mf2_t test_vundefined_u16mf2() { - return __riscv_vundefined_u16mf2(); -} +vuint16mf2_t test_vundefined_u16mf2() { return __riscv_vundefined_u16mf2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u16m1 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u16m1( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint16m1_t test_vundefined_u16m1() { - return __riscv_vundefined_u16m1(); -} +vuint16m1_t test_vundefined_u16m1() { return __riscv_vundefined_u16m1(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u16m2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u16m2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint16m2_t test_vundefined_u16m2() { - return __riscv_vundefined_u16m2(); -} +vuint16m2_t test_vundefined_u16m2() { return __riscv_vundefined_u16m2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u16m4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u16m4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint16m4_t test_vundefined_u16m4() { - return __riscv_vundefined_u16m4(); -} +vuint16m4_t test_vundefined_u16m4() { return __riscv_vundefined_u16m4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u16m8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u16m8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint16m8_t test_vundefined_u16m8() { - return __riscv_vundefined_u16m8(); -} +vuint16m8_t test_vundefined_u16m8() { return __riscv_vundefined_u16m8(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u32mf2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u32mf2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint32mf2_t test_vundefined_u32mf2() { - return __riscv_vundefined_u32mf2(); -} +vuint32mf2_t test_vundefined_u32mf2() { return __riscv_vundefined_u32mf2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u32m1 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u32m1( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint32m1_t test_vundefined_u32m1() { - return __riscv_vundefined_u32m1(); -} +vuint32m1_t test_vundefined_u32m1() { return __riscv_vundefined_u32m1(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u32m2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u32m2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint32m2_t test_vundefined_u32m2() { - return __riscv_vundefined_u32m2(); -} +vuint32m2_t test_vundefined_u32m2() { return __riscv_vundefined_u32m2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u32m4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u32m4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint32m4_t test_vundefined_u32m4() { - return __riscv_vundefined_u32m4(); -} +vuint32m4_t test_vundefined_u32m4() { return __riscv_vundefined_u32m4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u32m8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u32m8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint32m8_t test_vundefined_u32m8() { - return __riscv_vundefined_u32m8(); -} +vuint32m8_t test_vundefined_u32m8() { return __riscv_vundefined_u32m8(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u64m1 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u64m1( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint64m1_t test_vundefined_u64m1() { - return __riscv_vundefined_u64m1(); -} +vuint64m1_t test_vundefined_u64m1() { return __riscv_vundefined_u64m1(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u64m2 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u64m2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint64m2_t test_vundefined_u64m2() { - return __riscv_vundefined_u64m2(); -} +vuint64m2_t test_vundefined_u64m2() { return __riscv_vundefined_u64m2(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u64m4 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u64m4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint64m4_t test_vundefined_u64m4() { - return __riscv_vundefined_u64m4(); -} +vuint64m4_t test_vundefined_u64m4() { return __riscv_vundefined_u64m4(); } -// CHECK-RV64-LABEL: define dso_local @test_vundefined_u64m8 -// CHECK-RV64-SAME: () #[[ATTR0]] { +// CHECK-RV64-LABEL: define dso_local @test_vundefined_u64m8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: ret poison // -vuint64m8_t test_vundefined_u64m8() { - return __riscv_vundefined_u64m8(); +vuint64m8_t test_vundefined_u64m8() { return __riscv_vundefined_u64m8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_f16mf4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vfloat16mf4x2_t test_vundefined_f16mf4x2() { + return __riscv_vundefined_f16mf4x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_f16mf4x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vfloat16mf4x3_t test_vundefined_f16mf4x3() { + return __riscv_vundefined_f16mf4x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_f16mf4x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vfloat16mf4x4_t test_vundefined_f16mf4x4() { + return __riscv_vundefined_f16mf4x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_f16mf4x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vfloat16mf4x5_t test_vundefined_f16mf4x5() { + return __riscv_vundefined_f16mf4x5(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_f16mf4x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vfloat16mf4x6_t test_vundefined_f16mf4x6() { + return __riscv_vundefined_f16mf4x6(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_f16mf4x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vfloat16mf4x7_t test_vundefined_f16mf4x7() { + return __riscv_vundefined_f16mf4x7(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_f16mf4x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vfloat16mf4x8_t test_vundefined_f16mf4x8() { + return __riscv_vundefined_f16mf4x8(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_f16mf2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vfloat16mf2x2_t test_vundefined_f16mf2x2() { + return __riscv_vundefined_f16mf2x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_f16mf2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vfloat16mf2x3_t test_vundefined_f16mf2x3() { + return __riscv_vundefined_f16mf2x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_f16mf2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vfloat16mf2x4_t test_vundefined_f16mf2x4() { + return __riscv_vundefined_f16mf2x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_f16mf2x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vfloat16mf2x5_t test_vundefined_f16mf2x5() { + return __riscv_vundefined_f16mf2x5(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_f16mf2x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vfloat16mf2x6_t test_vundefined_f16mf2x6() { + return __riscv_vundefined_f16mf2x6(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_f16mf2x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vfloat16mf2x7_t test_vundefined_f16mf2x7() { + return __riscv_vundefined_f16mf2x7(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_f16mf2x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vfloat16mf2x8_t test_vundefined_f16mf2x8() { + return __riscv_vundefined_f16mf2x8(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_f16m1x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vfloat16m1x2_t test_vundefined_f16m1x2() { + return __riscv_vundefined_f16m1x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_f16m1x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vfloat16m1x3_t test_vundefined_f16m1x3() { + return __riscv_vundefined_f16m1x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_f16m1x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vfloat16m1x4_t test_vundefined_f16m1x4() { + return __riscv_vundefined_f16m1x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_f16m1x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vfloat16m1x5_t test_vundefined_f16m1x5() { + return __riscv_vundefined_f16m1x5(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_f16m1x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vfloat16m1x6_t test_vundefined_f16m1x6() { + return __riscv_vundefined_f16m1x6(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_f16m1x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vfloat16m1x7_t test_vundefined_f16m1x7() { + return __riscv_vundefined_f16m1x7(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_f16m1x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vfloat16m1x8_t test_vundefined_f16m1x8() { + return __riscv_vundefined_f16m1x8(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_f16m2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vfloat16m2x2_t test_vundefined_f16m2x2() { + return __riscv_vundefined_f16m2x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_f16m2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vfloat16m2x3_t test_vundefined_f16m2x3() { + return __riscv_vundefined_f16m2x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_f16m2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vfloat16m2x4_t test_vundefined_f16m2x4() { + return __riscv_vundefined_f16m2x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_f16m4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vfloat16m4x2_t test_vundefined_f16m4x2() { + return __riscv_vundefined_f16m4x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_f32mf2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vfloat32mf2x2_t test_vundefined_f32mf2x2() { + return __riscv_vundefined_f32mf2x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_f32mf2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vfloat32mf2x3_t test_vundefined_f32mf2x3() { + return __riscv_vundefined_f32mf2x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_f32mf2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vfloat32mf2x4_t test_vundefined_f32mf2x4() { + return __riscv_vundefined_f32mf2x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_f32mf2x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vfloat32mf2x5_t test_vundefined_f32mf2x5() { + return __riscv_vundefined_f32mf2x5(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_f32mf2x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vfloat32mf2x6_t test_vundefined_f32mf2x6() { + return __riscv_vundefined_f32mf2x6(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_f32mf2x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vfloat32mf2x7_t test_vundefined_f32mf2x7() { + return __riscv_vundefined_f32mf2x7(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_f32mf2x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vfloat32mf2x8_t test_vundefined_f32mf2x8() { + return __riscv_vundefined_f32mf2x8(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_f32m1x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vfloat32m1x2_t test_vundefined_f32m1x2() { + return __riscv_vundefined_f32m1x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_f32m1x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vfloat32m1x3_t test_vundefined_f32m1x3() { + return __riscv_vundefined_f32m1x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_f32m1x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vfloat32m1x4_t test_vundefined_f32m1x4() { + return __riscv_vundefined_f32m1x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_f32m1x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vfloat32m1x5_t test_vundefined_f32m1x5() { + return __riscv_vundefined_f32m1x5(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_f32m1x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vfloat32m1x6_t test_vundefined_f32m1x6() { + return __riscv_vundefined_f32m1x6(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_f32m1x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vfloat32m1x7_t test_vundefined_f32m1x7() { + return __riscv_vundefined_f32m1x7(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_f32m1x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vfloat32m1x8_t test_vundefined_f32m1x8() { + return __riscv_vundefined_f32m1x8(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_f32m2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vfloat32m2x2_t test_vundefined_f32m2x2() { + return __riscv_vundefined_f32m2x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_f32m2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vfloat32m2x3_t test_vundefined_f32m2x3() { + return __riscv_vundefined_f32m2x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_f32m2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vfloat32m2x4_t test_vundefined_f32m2x4() { + return __riscv_vundefined_f32m2x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_f32m4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vfloat32m4x2_t test_vundefined_f32m4x2() { + return __riscv_vundefined_f32m4x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_f64m1x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vfloat64m1x2_t test_vundefined_f64m1x2() { + return __riscv_vundefined_f64m1x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_f64m1x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vfloat64m1x3_t test_vundefined_f64m1x3() { + return __riscv_vundefined_f64m1x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_f64m1x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vfloat64m1x4_t test_vundefined_f64m1x4() { + return __riscv_vundefined_f64m1x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_f64m1x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vfloat64m1x5_t test_vundefined_f64m1x5() { + return __riscv_vundefined_f64m1x5(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_f64m1x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vfloat64m1x6_t test_vundefined_f64m1x6() { + return __riscv_vundefined_f64m1x6(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_f64m1x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vfloat64m1x7_t test_vundefined_f64m1x7() { + return __riscv_vundefined_f64m1x7(); } +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_f64m1x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vfloat64m1x8_t test_vundefined_f64m1x8() { + return __riscv_vundefined_f64m1x8(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_f64m2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vfloat64m2x2_t test_vundefined_f64m2x2() { + return __riscv_vundefined_f64m2x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_f64m2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vfloat64m2x3_t test_vundefined_f64m2x3() { + return __riscv_vundefined_f64m2x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_f64m2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vfloat64m2x4_t test_vundefined_f64m2x4() { + return __riscv_vundefined_f64m2x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_f64m4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vfloat64m4x2_t test_vundefined_f64m4x2() { + return __riscv_vundefined_f64m4x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i8mf8x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint8mf8x2_t test_vundefined_i8mf8x2() { return __riscv_vundefined_i8mf8x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i8mf8x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint8mf8x3_t test_vundefined_i8mf8x3() { return __riscv_vundefined_i8mf8x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i8mf8x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint8mf8x4_t test_vundefined_i8mf8x4() { return __riscv_vundefined_i8mf8x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_i8mf8x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vint8mf8x5_t test_vundefined_i8mf8x5() { return __riscv_vundefined_i8mf8x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_i8mf8x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vint8mf8x6_t test_vundefined_i8mf8x6() { return __riscv_vundefined_i8mf8x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_i8mf8x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vint8mf8x7_t test_vundefined_i8mf8x7() { return __riscv_vundefined_i8mf8x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_i8mf8x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vint8mf8x8_t test_vundefined_i8mf8x8() { return __riscv_vundefined_i8mf8x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i8mf4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint8mf4x2_t test_vundefined_i8mf4x2() { return __riscv_vundefined_i8mf4x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i8mf4x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint8mf4x3_t test_vundefined_i8mf4x3() { return __riscv_vundefined_i8mf4x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i8mf4x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint8mf4x4_t test_vundefined_i8mf4x4() { return __riscv_vundefined_i8mf4x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_i8mf4x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vint8mf4x5_t test_vundefined_i8mf4x5() { return __riscv_vundefined_i8mf4x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_i8mf4x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vint8mf4x6_t test_vundefined_i8mf4x6() { return __riscv_vundefined_i8mf4x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_i8mf4x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vint8mf4x7_t test_vundefined_i8mf4x7() { return __riscv_vundefined_i8mf4x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_i8mf4x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vint8mf4x8_t test_vundefined_i8mf4x8() { return __riscv_vundefined_i8mf4x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i8mf2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint8mf2x2_t test_vundefined_i8mf2x2() { return __riscv_vundefined_i8mf2x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i8mf2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint8mf2x3_t test_vundefined_i8mf2x3() { return __riscv_vundefined_i8mf2x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i8mf2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint8mf2x4_t test_vundefined_i8mf2x4() { return __riscv_vundefined_i8mf2x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_i8mf2x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vint8mf2x5_t test_vundefined_i8mf2x5() { return __riscv_vundefined_i8mf2x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_i8mf2x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vint8mf2x6_t test_vundefined_i8mf2x6() { return __riscv_vundefined_i8mf2x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_i8mf2x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vint8mf2x7_t test_vundefined_i8mf2x7() { return __riscv_vundefined_i8mf2x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_i8mf2x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vint8mf2x8_t test_vundefined_i8mf2x8() { return __riscv_vundefined_i8mf2x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i8m1x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint8m1x2_t test_vundefined_i8m1x2() { return __riscv_vundefined_i8m1x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i8m1x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint8m1x3_t test_vundefined_i8m1x3() { return __riscv_vundefined_i8m1x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i8m1x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint8m1x4_t test_vundefined_i8m1x4() { return __riscv_vundefined_i8m1x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_i8m1x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vint8m1x5_t test_vundefined_i8m1x5() { return __riscv_vundefined_i8m1x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_i8m1x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vint8m1x6_t test_vundefined_i8m1x6() { return __riscv_vundefined_i8m1x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_i8m1x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vint8m1x7_t test_vundefined_i8m1x7() { return __riscv_vundefined_i8m1x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_i8m1x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vint8m1x8_t test_vundefined_i8m1x8() { return __riscv_vundefined_i8m1x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i8m2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint8m2x2_t test_vundefined_i8m2x2() { return __riscv_vundefined_i8m2x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i8m2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint8m2x3_t test_vundefined_i8m2x3() { return __riscv_vundefined_i8m2x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i8m2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint8m2x4_t test_vundefined_i8m2x4() { return __riscv_vundefined_i8m2x4(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i8m4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint8m4x2_t test_vundefined_i8m4x2() { return __riscv_vundefined_i8m4x2(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i16mf4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint16mf4x2_t test_vundefined_i16mf4x2() { + return __riscv_vundefined_i16mf4x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i16mf4x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint16mf4x3_t test_vundefined_i16mf4x3() { + return __riscv_vundefined_i16mf4x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i16mf4x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint16mf4x4_t test_vundefined_i16mf4x4() { + return __riscv_vundefined_i16mf4x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_i16mf4x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vint16mf4x5_t test_vundefined_i16mf4x5() { + return __riscv_vundefined_i16mf4x5(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_i16mf4x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vint16mf4x6_t test_vundefined_i16mf4x6() { + return __riscv_vundefined_i16mf4x6(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_i16mf4x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vint16mf4x7_t test_vundefined_i16mf4x7() { + return __riscv_vundefined_i16mf4x7(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_i16mf4x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vint16mf4x8_t test_vundefined_i16mf4x8() { + return __riscv_vundefined_i16mf4x8(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i16mf2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint16mf2x2_t test_vundefined_i16mf2x2() { + return __riscv_vundefined_i16mf2x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i16mf2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint16mf2x3_t test_vundefined_i16mf2x3() { + return __riscv_vundefined_i16mf2x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i16mf2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint16mf2x4_t test_vundefined_i16mf2x4() { + return __riscv_vundefined_i16mf2x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_i16mf2x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vint16mf2x5_t test_vundefined_i16mf2x5() { + return __riscv_vundefined_i16mf2x5(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_i16mf2x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vint16mf2x6_t test_vundefined_i16mf2x6() { + return __riscv_vundefined_i16mf2x6(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_i16mf2x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vint16mf2x7_t test_vundefined_i16mf2x7() { + return __riscv_vundefined_i16mf2x7(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_i16mf2x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vint16mf2x8_t test_vundefined_i16mf2x8() { + return __riscv_vundefined_i16mf2x8(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i16m1x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint16m1x2_t test_vundefined_i16m1x2() { return __riscv_vundefined_i16m1x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i16m1x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint16m1x3_t test_vundefined_i16m1x3() { return __riscv_vundefined_i16m1x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i16m1x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint16m1x4_t test_vundefined_i16m1x4() { return __riscv_vundefined_i16m1x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_i16m1x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vint16m1x5_t test_vundefined_i16m1x5() { return __riscv_vundefined_i16m1x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_i16m1x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vint16m1x6_t test_vundefined_i16m1x6() { return __riscv_vundefined_i16m1x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_i16m1x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vint16m1x7_t test_vundefined_i16m1x7() { return __riscv_vundefined_i16m1x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_i16m1x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vint16m1x8_t test_vundefined_i16m1x8() { return __riscv_vundefined_i16m1x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i16m2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint16m2x2_t test_vundefined_i16m2x2() { return __riscv_vundefined_i16m2x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i16m2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint16m2x3_t test_vundefined_i16m2x3() { return __riscv_vundefined_i16m2x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i16m2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint16m2x4_t test_vundefined_i16m2x4() { return __riscv_vundefined_i16m2x4(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i16m4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint16m4x2_t test_vundefined_i16m4x2() { return __riscv_vundefined_i16m4x2(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i32mf2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint32mf2x2_t test_vundefined_i32mf2x2() { + return __riscv_vundefined_i32mf2x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i32mf2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint32mf2x3_t test_vundefined_i32mf2x3() { + return __riscv_vundefined_i32mf2x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i32mf2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint32mf2x4_t test_vundefined_i32mf2x4() { + return __riscv_vundefined_i32mf2x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_i32mf2x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vint32mf2x5_t test_vundefined_i32mf2x5() { + return __riscv_vundefined_i32mf2x5(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_i32mf2x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vint32mf2x6_t test_vundefined_i32mf2x6() { + return __riscv_vundefined_i32mf2x6(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_i32mf2x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vint32mf2x7_t test_vundefined_i32mf2x7() { + return __riscv_vundefined_i32mf2x7(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_i32mf2x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vint32mf2x8_t test_vundefined_i32mf2x8() { + return __riscv_vundefined_i32mf2x8(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i32m1x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint32m1x2_t test_vundefined_i32m1x2() { return __riscv_vundefined_i32m1x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i32m1x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint32m1x3_t test_vundefined_i32m1x3() { return __riscv_vundefined_i32m1x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i32m1x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint32m1x4_t test_vundefined_i32m1x4() { return __riscv_vundefined_i32m1x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_i32m1x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vint32m1x5_t test_vundefined_i32m1x5() { return __riscv_vundefined_i32m1x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_i32m1x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vint32m1x6_t test_vundefined_i32m1x6() { return __riscv_vundefined_i32m1x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_i32m1x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vint32m1x7_t test_vundefined_i32m1x7() { return __riscv_vundefined_i32m1x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_i32m1x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vint32m1x8_t test_vundefined_i32m1x8() { return __riscv_vundefined_i32m1x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i32m2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint32m2x2_t test_vundefined_i32m2x2() { return __riscv_vundefined_i32m2x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i32m2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint32m2x3_t test_vundefined_i32m2x3() { return __riscv_vundefined_i32m2x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i32m2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint32m2x4_t test_vundefined_i32m2x4() { return __riscv_vundefined_i32m2x4(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i32m4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint32m4x2_t test_vundefined_i32m4x2() { return __riscv_vundefined_i32m4x2(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i64m1x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint64m1x2_t test_vundefined_i64m1x2() { return __riscv_vundefined_i64m1x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i64m1x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint64m1x3_t test_vundefined_i64m1x3() { return __riscv_vundefined_i64m1x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i64m1x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint64m1x4_t test_vundefined_i64m1x4() { return __riscv_vundefined_i64m1x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_i64m1x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vint64m1x5_t test_vundefined_i64m1x5() { return __riscv_vundefined_i64m1x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_i64m1x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vint64m1x6_t test_vundefined_i64m1x6() { return __riscv_vundefined_i64m1x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_i64m1x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vint64m1x7_t test_vundefined_i64m1x7() { return __riscv_vundefined_i64m1x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_i64m1x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vint64m1x8_t test_vundefined_i64m1x8() { return __riscv_vundefined_i64m1x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i64m2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint64m2x2_t test_vundefined_i64m2x2() { return __riscv_vundefined_i64m2x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_i64m2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vint64m2x3_t test_vundefined_i64m2x3() { return __riscv_vundefined_i64m2x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_i64m2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vint64m2x4_t test_vundefined_i64m2x4() { return __riscv_vundefined_i64m2x4(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_i64m4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vint64m4x2_t test_vundefined_i64m4x2() { return __riscv_vundefined_i64m4x2(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u8mf8x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint8mf8x2_t test_vundefined_u8mf8x2() { return __riscv_vundefined_u8mf8x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u8mf8x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint8mf8x3_t test_vundefined_u8mf8x3() { return __riscv_vundefined_u8mf8x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u8mf8x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint8mf8x4_t test_vundefined_u8mf8x4() { return __riscv_vundefined_u8mf8x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_u8mf8x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vuint8mf8x5_t test_vundefined_u8mf8x5() { return __riscv_vundefined_u8mf8x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_u8mf8x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vuint8mf8x6_t test_vundefined_u8mf8x6() { return __riscv_vundefined_u8mf8x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_u8mf8x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vuint8mf8x7_t test_vundefined_u8mf8x7() { return __riscv_vundefined_u8mf8x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_u8mf8x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vuint8mf8x8_t test_vundefined_u8mf8x8() { return __riscv_vundefined_u8mf8x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u8mf4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint8mf4x2_t test_vundefined_u8mf4x2() { return __riscv_vundefined_u8mf4x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u8mf4x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint8mf4x3_t test_vundefined_u8mf4x3() { return __riscv_vundefined_u8mf4x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u8mf4x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint8mf4x4_t test_vundefined_u8mf4x4() { return __riscv_vundefined_u8mf4x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_u8mf4x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vuint8mf4x5_t test_vundefined_u8mf4x5() { return __riscv_vundefined_u8mf4x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_u8mf4x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vuint8mf4x6_t test_vundefined_u8mf4x6() { return __riscv_vundefined_u8mf4x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_u8mf4x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vuint8mf4x7_t test_vundefined_u8mf4x7() { return __riscv_vundefined_u8mf4x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_u8mf4x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vuint8mf4x8_t test_vundefined_u8mf4x8() { return __riscv_vundefined_u8mf4x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u8mf2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint8mf2x2_t test_vundefined_u8mf2x2() { return __riscv_vundefined_u8mf2x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u8mf2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint8mf2x3_t test_vundefined_u8mf2x3() { return __riscv_vundefined_u8mf2x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u8mf2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint8mf2x4_t test_vundefined_u8mf2x4() { return __riscv_vundefined_u8mf2x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_u8mf2x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vuint8mf2x5_t test_vundefined_u8mf2x5() { return __riscv_vundefined_u8mf2x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_u8mf2x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vuint8mf2x6_t test_vundefined_u8mf2x6() { return __riscv_vundefined_u8mf2x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_u8mf2x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vuint8mf2x7_t test_vundefined_u8mf2x7() { return __riscv_vundefined_u8mf2x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_u8mf2x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vuint8mf2x8_t test_vundefined_u8mf2x8() { return __riscv_vundefined_u8mf2x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u8m1x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint8m1x2_t test_vundefined_u8m1x2() { return __riscv_vundefined_u8m1x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u8m1x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint8m1x3_t test_vundefined_u8m1x3() { return __riscv_vundefined_u8m1x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u8m1x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint8m1x4_t test_vundefined_u8m1x4() { return __riscv_vundefined_u8m1x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_u8m1x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vuint8m1x5_t test_vundefined_u8m1x5() { return __riscv_vundefined_u8m1x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_u8m1x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vuint8m1x6_t test_vundefined_u8m1x6() { return __riscv_vundefined_u8m1x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_u8m1x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vuint8m1x7_t test_vundefined_u8m1x7() { return __riscv_vundefined_u8m1x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_u8m1x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vuint8m1x8_t test_vundefined_u8m1x8() { return __riscv_vundefined_u8m1x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u8m2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint8m2x2_t test_vundefined_u8m2x2() { return __riscv_vundefined_u8m2x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u8m2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint8m2x3_t test_vundefined_u8m2x3() { return __riscv_vundefined_u8m2x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u8m2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint8m2x4_t test_vundefined_u8m2x4() { return __riscv_vundefined_u8m2x4(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u8m4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint8m4x2_t test_vundefined_u8m4x2() { return __riscv_vundefined_u8m4x2(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u16mf4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint16mf4x2_t test_vundefined_u16mf4x2() { + return __riscv_vundefined_u16mf4x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u16mf4x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint16mf4x3_t test_vundefined_u16mf4x3() { + return __riscv_vundefined_u16mf4x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u16mf4x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint16mf4x4_t test_vundefined_u16mf4x4() { + return __riscv_vundefined_u16mf4x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_u16mf4x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vuint16mf4x5_t test_vundefined_u16mf4x5() { + return __riscv_vundefined_u16mf4x5(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_u16mf4x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vuint16mf4x6_t test_vundefined_u16mf4x6() { + return __riscv_vundefined_u16mf4x6(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_u16mf4x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vuint16mf4x7_t test_vundefined_u16mf4x7() { + return __riscv_vundefined_u16mf4x7(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_u16mf4x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vuint16mf4x8_t test_vundefined_u16mf4x8() { + return __riscv_vundefined_u16mf4x8(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u16mf2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint16mf2x2_t test_vundefined_u16mf2x2() { + return __riscv_vundefined_u16mf2x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u16mf2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint16mf2x3_t test_vundefined_u16mf2x3() { + return __riscv_vundefined_u16mf2x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u16mf2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint16mf2x4_t test_vundefined_u16mf2x4() { + return __riscv_vundefined_u16mf2x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_u16mf2x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vuint16mf2x5_t test_vundefined_u16mf2x5() { + return __riscv_vundefined_u16mf2x5(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_u16mf2x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vuint16mf2x6_t test_vundefined_u16mf2x6() { + return __riscv_vundefined_u16mf2x6(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_u16mf2x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vuint16mf2x7_t test_vundefined_u16mf2x7() { + return __riscv_vundefined_u16mf2x7(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_u16mf2x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vuint16mf2x8_t test_vundefined_u16mf2x8() { + return __riscv_vundefined_u16mf2x8(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u16m1x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint16m1x2_t test_vundefined_u16m1x2() { return __riscv_vundefined_u16m1x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u16m1x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint16m1x3_t test_vundefined_u16m1x3() { return __riscv_vundefined_u16m1x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u16m1x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint16m1x4_t test_vundefined_u16m1x4() { return __riscv_vundefined_u16m1x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_u16m1x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vuint16m1x5_t test_vundefined_u16m1x5() { return __riscv_vundefined_u16m1x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_u16m1x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vuint16m1x6_t test_vundefined_u16m1x6() { return __riscv_vundefined_u16m1x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_u16m1x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vuint16m1x7_t test_vundefined_u16m1x7() { return __riscv_vundefined_u16m1x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_u16m1x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vuint16m1x8_t test_vundefined_u16m1x8() { return __riscv_vundefined_u16m1x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u16m2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint16m2x2_t test_vundefined_u16m2x2() { return __riscv_vundefined_u16m2x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u16m2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint16m2x3_t test_vundefined_u16m2x3() { return __riscv_vundefined_u16m2x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u16m2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint16m2x4_t test_vundefined_u16m2x4() { return __riscv_vundefined_u16m2x4(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u16m4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint16m4x2_t test_vundefined_u16m4x2() { return __riscv_vundefined_u16m4x2(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u32mf2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint32mf2x2_t test_vundefined_u32mf2x2() { + return __riscv_vundefined_u32mf2x2(); +} + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u32mf2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint32mf2x3_t test_vundefined_u32mf2x3() { + return __riscv_vundefined_u32mf2x3(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u32mf2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint32mf2x4_t test_vundefined_u32mf2x4() { + return __riscv_vundefined_u32mf2x4(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_u32mf2x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vuint32mf2x5_t test_vundefined_u32mf2x5() { + return __riscv_vundefined_u32mf2x5(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_u32mf2x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vuint32mf2x6_t test_vundefined_u32mf2x6() { + return __riscv_vundefined_u32mf2x6(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_u32mf2x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vuint32mf2x7_t test_vundefined_u32mf2x7() { + return __riscv_vundefined_u32mf2x7(); +} + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_u32mf2x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vuint32mf2x8_t test_vundefined_u32mf2x8() { + return __riscv_vundefined_u32mf2x8(); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u32m1x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint32m1x2_t test_vundefined_u32m1x2() { return __riscv_vundefined_u32m1x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u32m1x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint32m1x3_t test_vundefined_u32m1x3() { return __riscv_vundefined_u32m1x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u32m1x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint32m1x4_t test_vundefined_u32m1x4() { return __riscv_vundefined_u32m1x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_u32m1x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vuint32m1x5_t test_vundefined_u32m1x5() { return __riscv_vundefined_u32m1x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_u32m1x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vuint32m1x6_t test_vundefined_u32m1x6() { return __riscv_vundefined_u32m1x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_u32m1x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vuint32m1x7_t test_vundefined_u32m1x7() { return __riscv_vundefined_u32m1x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_u32m1x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vuint32m1x8_t test_vundefined_u32m1x8() { return __riscv_vundefined_u32m1x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u32m2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint32m2x2_t test_vundefined_u32m2x2() { return __riscv_vundefined_u32m2x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u32m2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint32m2x3_t test_vundefined_u32m2x3() { return __riscv_vundefined_u32m2x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u32m2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint32m2x4_t test_vundefined_u32m2x4() { return __riscv_vundefined_u32m2x4(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u32m4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint32m4x2_t test_vundefined_u32m4x2() { return __riscv_vundefined_u32m4x2(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u64m1x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint64m1x2_t test_vundefined_u64m1x2() { return __riscv_vundefined_u64m1x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u64m1x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint64m1x3_t test_vundefined_u64m1x3() { return __riscv_vundefined_u64m1x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u64m1x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint64m1x4_t test_vundefined_u64m1x4() { return __riscv_vundefined_u64m1x4(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , } @test_vundefined_u64m1x5( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , } poison +// +vuint64m1x5_t test_vundefined_u64m1x5() { return __riscv_vundefined_u64m1x5(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , } @test_vundefined_u64m1x6( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , } poison +// +vuint64m1x6_t test_vundefined_u64m1x6() { return __riscv_vundefined_u64m1x6(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , } @test_vundefined_u64m1x7( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , } poison +// +vuint64m1x7_t test_vundefined_u64m1x7() { return __riscv_vundefined_u64m1x7(); } + +// CHECK-RV64-LABEL: define dso_local { , , , , , , , } @test_vundefined_u64m1x8( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , , , , , } poison +// +vuint64m1x8_t test_vundefined_u64m1x8() { return __riscv_vundefined_u64m1x8(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u64m2x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint64m2x2_t test_vundefined_u64m2x2() { return __riscv_vundefined_u64m2x2(); } + +// CHECK-RV64-LABEL: define dso_local { , , } @test_vundefined_u64m2x3( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , } poison +// +vuint64m2x3_t test_vundefined_u64m2x3() { return __riscv_vundefined_u64m2x3(); } + +// CHECK-RV64-LABEL: define dso_local { , , , } @test_vundefined_u64m2x4( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , , , } poison +// +vuint64m2x4_t test_vundefined_u64m2x4() { return __riscv_vundefined_u64m2x4(); } + +// CHECK-RV64-LABEL: define dso_local { , } @test_vundefined_u64m4x2( +// CHECK-RV64-SAME: ) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: ret { , } poison +// +vuint64m4x2_t test_vundefined_u64m4x2() { return __riscv_vundefined_u64m4x2(); } From 43a94cbafae13780f67ec4adac2a48863468671b Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Mon, 30 Oct 2023 13:25:06 +0000 Subject: [PATCH 017/144] [mlir][ArmSME] NFC: Rename tild_id -> tile_id --- mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td index df837ebcf23b30..e369ef203ad39d 100644 --- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td @@ -86,7 +86,7 @@ class ArmSME_IntrStoreOp : ArmSME_IntrOp, Arguments<(ins Arg:$predicate, Arg:$store_address, - Arg:$tild_id, + Arg:$tile_id, Arg:$tile_slice_index)>; def LLVM_aarch64_sme_st1b_horiz : ArmSME_IntrStoreOp<"st1b.horiz">; From 292f34b0d3cb2a04be5ebb85aaeb838b29f71323 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 30 Oct 2023 15:01:01 +0100 Subject: [PATCH 018/144] [AArch64][GlobalISel] Fix incorrect ABI when tail call not supported (#70215) The check for whether a tail call is supported calls determineAssignments(), which may modify argument flags. As such, even though the check fails and a non-tail call will be emitted, it will not have a different (incorrect) ABI. Fix this by operating on a separate copy of the arguments. Fixes https://github.com/llvm/llvm-project/issues/70207. --- .../AArch64/GISel/AArch64CallLowering.cpp | 7 +++++-- .../call-lowering-tail-call-fallback.ll | 18 +++++++----------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 2d6cc870f98e77..84057ea8d2214a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -832,9 +832,9 @@ bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( CallLoweringInfo &Info, MachineFunction &MF, - SmallVectorImpl &OutArgs) const { + SmallVectorImpl &OrigOutArgs) const { // If there are no outgoing arguments, then we are done. - if (OutArgs.empty()) + if (OrigOutArgs.empty()) return true; const Function &CallerF = MF.getFunction(); @@ -854,6 +854,9 @@ bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, Subtarget, /*IsReturn*/ false); + // determineAssignments() may modify argument flags, so make a copy. + SmallVector OutArgs; + append_range(OutArgs, OrigOutArgs); if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) { LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n"); return false; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-tail-call-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-tail-call-fallback.ll index fc6eefb4016b66..ebd2beca678105 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-tail-call-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-tail-call-fallback.ll @@ -3,30 +3,26 @@ declare void @func(i64, i64, i64, i64, i64, i128, i128) -; FIXME: This is a miscompile. ; Make sure the check for whether a tail call is allowed does not affect the ; calling convention if it fails. ; The first i128 argument should be passed in registers, not on the stack. define void @pr70207(i128 %arg1, i128 %arg2) nounwind { ; CHECK-LABEL: pr70207: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: mov x8, x2 ; CHECK-NEXT: mov x6, x0 -; CHECK-NEXT: mov x8, x1 -; CHECK-NEXT: mov x9, x2 -; CHECK-NEXT: mov x10, x3 +; CHECK-NEXT: mov x7, x1 +; CHECK-NEXT: mov x9, x3 ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: mov x1, xzr ; CHECK-NEXT: mov x2, xzr ; CHECK-NEXT: mov x3, xzr ; CHECK-NEXT: mov x4, xzr -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-NEXT: str x8, [sp] -; CHECK-NEXT: str x9, [sp, #16] -; CHECK-NEXT: str x10, [sp, #32] +; CHECK-NEXT: str x8, [sp, #-32]! +; CHECK-NEXT: stp x9, x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl func -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret tail call void @func(i64 0, i64 0, i64 0, i64 0, i64 0, i128 %arg1, i128 %arg2) ret void From 564e0165abc851dcd7a3485d4e728ce63c3d6466 Mon Sep 17 00:00:00 2001 From: Discookie Date: Mon, 30 Oct 2023 14:03:33 +0000 Subject: [PATCH 019/144] [clang][analyzer] Do not analyze opaque types in CXXDeleteChecker (#70638) While inheritance can only be expressed if the class has a definition, in this case one of the types might be opaque to the analyzer. Fixes a crash encountered while analyzing LLVM. --- clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp index 1a1f5c53029403..eb265f4dde68bc 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp @@ -114,6 +114,9 @@ void DeleteWithNonVirtualDtorChecker::checkTypedDeleteExpr( if (!BaseClass || !DerivedClass) return; + if (!BaseClass->hasDefinition() || !DerivedClass->hasDefinition()) + return; + if (BaseClass->getDestructor()->isVirtual()) return; @@ -148,6 +151,9 @@ void CXXArrayDeleteChecker::checkTypedDeleteExpr( if (!BaseClass || !DerivedClass) return; + if (!BaseClass->hasDefinition() || !DerivedClass->hasDefinition()) + return; + if (DE->getOperatorDelete()->getOverloadedOperator() != OO_Array_Delete) return; From fb08c694a42ac6199cc64ca9accf2c2421366b57 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 30 Oct 2023 15:26:06 +0100 Subject: [PATCH 020/144] [JumpThreading] Add test for #70651 (NFC) --- llvm/test/Transforms/JumpThreading/pr70651.ll | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 llvm/test/Transforms/JumpThreading/pr70651.ll diff --git a/llvm/test/Transforms/JumpThreading/pr70651.ll b/llvm/test/Transforms/JumpThreading/pr70651.ll new file mode 100644 index 00000000000000..a156be541874a6 --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/pr70651.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=jump-threading < %s | FileCheck %s + +; FIXME: This is a miscompile. +define i64 @test(i64 %v) { +; CHECK-LABEL: define i64 @test( +; CHECK-SAME: i64 [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V_NONNEG:%.*]] = icmp sgt i64 [[V]], -1 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[V]] +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ult i64 [[SUM_NEXT]], [[SUM]] +; CHECK-NEXT: br i1 [[V_NONNEG]], label [[FOR_BODY]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i64 [[SUM]] +; +entry: + %v.nonneg = icmp sgt i64 %v, -1 + br label %for.body + +for.body: + %sum = phi i64 [ 0, %entry ], [ %sum.next, %for.body ] + %sum.next = add i64 %sum, %v + %overflow = icmp ult i64 %sum.next, %sum + %cmp = xor i1 %v.nonneg, %overflow + br i1 %cmp, label %for.body, label %exit + +exit: + ret i64 %sum +} From af15c46777208a4cb4b276c4974a5b556608a415 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 30 Oct 2023 07:11:44 -0700 Subject: [PATCH 021/144] [SLP]Do not crash if number of vector registers does not feet the vector type. Need to check, if the number of vector registers, returned by TTI, is not greater than total number of mask element and not zero, before trying to perform any operations. TTI still may return non-valid number of registers. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 8 +-- .../SLPVectorizer/X86/shuffle-multivector.ll | 53 +++++++++++++++++++ 2 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index bb4e743c1544a9..b6895c649f838c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7580,8 +7580,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { auto *MaskVecTy = FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size()); unsigned NumParts = TTI.getNumberOfParts(MaskVecTy); - assert(NumParts > 0 && NumParts < Mask.size() && - "Expected positive number of registers."); + if (NumParts == 0 || NumParts >= Mask.size()) + NumParts = 1; unsigned SliceSize = Mask.size() / NumParts; const auto *It = find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; }); @@ -7598,8 +7598,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { auto *MaskVecTy = FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size()); unsigned NumParts = TTI.getNumberOfParts(MaskVecTy); - assert(NumParts > 0 && NumParts < Mask.size() && - "Expected positive number of registers."); + if (NumParts == 0 || NumParts >= Mask.size()) + NumParts = 1; unsigned SliceSize = Mask.size() / NumParts; const auto *It = find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; }); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll new file mode 100644 index 00000000000000..143052a3d9cd07 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-160 | FileCheck %s + +define void @test1(i128 %p0, i128 %p1, i128 %p2, i128 %p3, <4 x i128> %vec) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i128> poison, i128 [[P0:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i128> [[TMP0]], i128 [[P1:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i128> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i128> poison, i128 [[P2:%.*]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i128> [[TMP4]], i128 [[P3:%.*]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i128> [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[T5:%.*]] = trunc i128 [[P1]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP3]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i128> [[TMP1]], <2 x i128> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i128> [[VEC:%.*]], <4 x i128> [[TMP9]], <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i128> [[TMP10]] to <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = sdiv <4 x i32> [[TMP8]], [[TMP11]] +; CHECK-NEXT: br label [[BB:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP12]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret void +; +entry: + %t1 = trunc i128 %p0 to i32 + %t2 = trunc i128 %p1 to i32 + %t3 = trunc i128 %p2 to i32 + %t4 = trunc i128 %p3 to i32 + %t5 = trunc i128 %p1 to i32 + %t6 = trunc i128 %p0 to i32 + %t7 = trunc i128 %p3 to i32 + %t8 = trunc i128 %p2 to i32 + %m0 = sdiv i32 %t1, %t3 + %m1 = sdiv i32 %t2, %t4 + %m2 = sdiv i32 %t1, %t3 + %m3 = sdiv i32 %t2, %t4 + %e0 = extractelement <4 x i128> %vec, i32 0 + %t9 = trunc i128 %e0 to i32 + %d0 = sdiv i32 %m0, %t9 + %d1 = sdiv i32 %m1, %t6 + %d2 = sdiv i32 %m2, %t7 + %d3 = sdiv i32 %m3, %t8 + br label %bb + +bb: + %phi0 = phi i32 [ %d0, %entry ] + %phi1 = phi i32 [ %d1, %entry ] + %phi2 = phi i32 [ %d2, %entry ] + %phi3 = phi i32 [ %d3, %entry ] + ret void +} From bb352b6ead5b37a51c7a9d3bf7a1b23198f7a330 Mon Sep 17 00:00:00 2001 From: Egor Zhdan Date: Mon, 30 Oct 2023 14:36:44 +0000 Subject: [PATCH 022/144] [APINotes] Upstream APINotesReader This upstreams more of the Clang API Notes functionality that is currently implemented in the Apple fork: https://github.com/apple/llvm-project/tree/next/clang/lib/APINotes --- clang/include/clang/APINotes/APINotesReader.h | 200 ++ clang/include/clang/APINotes/Types.h | 8 +- clang/lib/APINotes/APINotesReader.cpp | 2048 +++++++++++++++++ clang/lib/APINotes/CMakeLists.txt | 3 + 4 files changed, 2252 insertions(+), 7 deletions(-) create mode 100644 clang/include/clang/APINotes/APINotesReader.h create mode 100644 clang/lib/APINotes/APINotesReader.cpp diff --git a/clang/include/clang/APINotes/APINotesReader.h b/clang/include/clang/APINotes/APINotesReader.h new file mode 100644 index 00000000000000..1c5aab09595509 --- /dev/null +++ b/clang/include/clang/APINotes/APINotesReader.h @@ -0,0 +1,200 @@ +//===--- APINotesReader.h - API Notes Reader --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the \c APINotesReader class that reads source API notes +// data providing additional information about source code as a separate input, +// such as the non-nil/nilable annotations for method parameters. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_APINOTES_READER_H +#define LLVM_CLANG_APINOTES_READER_H + +#include "clang/APINotes/Types.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/VersionTuple.h" +#include + +namespace clang { +namespace api_notes { + +/// A class that reads API notes data from a binary file that was written by +/// the \c APINotesWriter. +class APINotesReader { + class Implementation; + std::unique_ptr Implementation; + + APINotesReader(llvm::MemoryBuffer *InputBuffer, + llvm::VersionTuple SwiftVersion, bool &Failed); + +public: + /// Create a new API notes reader from the given member buffer, which + /// contains the contents of a binary API notes file. + /// + /// \returns the new API notes reader, or null if an error occurred. + static std::unique_ptr + Create(std::unique_ptr InputBuffer, + llvm::VersionTuple SwiftVersion); + + ~APINotesReader(); + + APINotesReader(const APINotesReader &) = delete; + APINotesReader &operator=(const APINotesReader &) = delete; + + /// Captures the completed versioned information for a particular part of + /// API notes, including both unversioned API notes and each versioned API + /// note for that particular entity. + template class VersionedInfo { + /// The complete set of results. + llvm::SmallVector, 1> Results; + + /// The index of the result that is the "selected" set based on the desired + /// Swift version, or null if nothing matched. + std::optional Selected; + + public: + /// Form an empty set of versioned information. + VersionedInfo(std::nullopt_t) : Selected(std::nullopt) {} + + /// Form a versioned info set given the desired version and a set of + /// results. + VersionedInfo( + llvm::VersionTuple Version, + llvm::SmallVector, 1> Results); + + /// Retrieve the selected index in the result set. + std::optional getSelected() const { return Selected; } + + /// Return the number of versioned results we know about. + unsigned size() const { return Results.size(); } + + /// Access all versioned results. + const std::pair *begin() const { + assert(!Results.empty()); + return Results.begin(); + } + const std::pair *end() const { + return Results.end(); + } + + /// Access a specific versioned result. + const std::pair &operator[](unsigned index) const { + assert(index < Results.size()); + return Results[index]; + } + }; + + /// Look for the context ID of the given Objective-C class. + /// + /// \param Name The name of the class we're looking for. + /// + /// \returns The ID, if known. + std::optional lookupObjCClassID(llvm::StringRef Name); + + /// Look for information regarding the given Objective-C class. + /// + /// \param Name The name of the class we're looking for. + /// + /// \returns The information about the class, if known. + VersionedInfo lookupObjCClassInfo(llvm::StringRef Name); + + /// Look for the context ID of the given Objective-C protocol. + /// + /// \param Name The name of the protocol we're looking for. + /// + /// \returns The ID of the protocol, if known. + std::optional lookupObjCProtocolID(llvm::StringRef Name); + + /// Look for information regarding the given Objective-C protocol. + /// + /// \param Name The name of the protocol we're looking for. + /// + /// \returns The information about the protocol, if known. + VersionedInfo lookupObjCProtocolInfo(llvm::StringRef Name); + + /// Look for information regarding the given Objective-C property in + /// the given context. + /// + /// \param CtxID The ID that references the context we are looking for. + /// \param Name The name of the property we're looking for. + /// \param IsInstance Whether we are looking for an instance property (vs. + /// a class property). + /// + /// \returns Information about the property, if known. + VersionedInfo + lookupObjCProperty(ContextID CtxID, llvm::StringRef Name, bool IsInstance); + + /// Look for information regarding the given Objective-C method in + /// the given context. + /// + /// \param CtxID The ID that references the context we are looking for. + /// \param Selector The selector naming the method we're looking for. + /// \param IsInstanceMethod Whether we are looking for an instance method. + /// + /// \returns Information about the method, if known. + VersionedInfo lookupObjCMethod(ContextID CtxID, + ObjCSelectorRef Selector, + bool IsInstanceMethod); + + /// Look for information regarding the given global variable. + /// + /// \param Name The name of the global variable. + /// + /// \returns information about the global variable, if known. + VersionedInfo + lookupGlobalVariable(llvm::StringRef Name, + std::optional Ctx = std::nullopt); + + /// Look for information regarding the given global function. + /// + /// \param Name The name of the global function. + /// + /// \returns information about the global function, if known. + VersionedInfo + lookupGlobalFunction(llvm::StringRef Name, + std::optional Ctx = std::nullopt); + + /// Look for information regarding the given enumerator. + /// + /// \param Name The name of the enumerator. + /// + /// \returns information about the enumerator, if known. + VersionedInfo lookupEnumConstant(llvm::StringRef Name); + + /// Look for information regarding the given tag + /// (struct/union/enum/C++ class). + /// + /// \param Name The name of the tag. + /// + /// \returns information about the tag, if known. + VersionedInfo lookupTag(llvm::StringRef Name, + std::optional Ctx = std::nullopt); + + /// Look for information regarding the given typedef. + /// + /// \param Name The name of the typedef. + /// + /// \returns information about the typedef, if known. + VersionedInfo + lookupTypedef(llvm::StringRef Name, + std::optional Ctx = std::nullopt); + + /// Look for the context ID of the given C++ namespace. + /// + /// \param Name The name of the class we're looking for. + /// + /// \returns The ID, if known. + std::optional + lookupNamespaceID(llvm::StringRef Name, + std::optional ParentNamespaceID = std::nullopt); +}; + +} // end namespace api_notes +} // end namespace clang + +#endif // LLVM_CLANG_APINOTES_READER_H diff --git a/clang/include/clang/APINotes/Types.h b/clang/include/clang/APINotes/Types.h index 354458588e3093..b74244bc8f1cbd 100644 --- a/clang/include/clang/APINotes/Types.h +++ b/clang/include/clang/APINotes/Types.h @@ -144,16 +144,10 @@ class CommonTypeInfo : public CommonEntityInfo { return SwiftBridge; } - void setSwiftBridge(const std::optional &SwiftType) { + void setSwiftBridge(std::optional SwiftType) { SwiftBridge = SwiftType; } - void setSwiftBridge(const std::optional &SwiftType) { - SwiftBridge = SwiftType - ? std::optional(std::string(*SwiftType)) - : std::nullopt; - } - const std::optional &getNSErrorDomain() const { return NSErrorDomain; } diff --git a/clang/lib/APINotes/APINotesReader.cpp b/clang/lib/APINotes/APINotesReader.cpp new file mode 100644 index 00000000000000..2cbf5fd3bf5030 --- /dev/null +++ b/clang/lib/APINotes/APINotesReader.cpp @@ -0,0 +1,2048 @@ +//===--- APINotesReader.cpp - API Notes Reader ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/APINotes/APINotesReader.h" +#include "APINotesFormat.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Bitstream/BitstreamReader.h" +#include "llvm/Support/DJB.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/OnDiskHashTable.h" + +namespace clang { +namespace api_notes { +using namespace llvm::support; + +namespace { +/// Deserialize a version tuple. +llvm::VersionTuple ReadVersionTuple(const uint8_t *&Data) { + uint8_t NumVersions = (*Data++) & 0x03; + + unsigned Major = + endian::readNext(Data); + if (NumVersions == 0) + return llvm::VersionTuple(Major); + + unsigned Minor = + endian::readNext(Data); + if (NumVersions == 1) + return llvm::VersionTuple(Major, Minor); + + unsigned Subminor = + endian::readNext(Data); + if (NumVersions == 2) + return llvm::VersionTuple(Major, Minor, Subminor); + + unsigned Build = + endian::readNext(Data); + return llvm::VersionTuple(Major, Minor, Subminor, Build); +} + +/// An on-disk hash table whose data is versioned based on the Swift version. +template +class VersionedTableInfo { +public: + using internal_key_type = KeyType; + using external_key_type = KeyType; + using data_type = + llvm::SmallVector, 1>; + using hash_value_type = size_t; + using offset_type = unsigned; + + internal_key_type GetInternalKey(external_key_type Key) { return Key; } + + external_key_type GetExternalKey(internal_key_type Key) { return Key; } + + static bool EqualKey(internal_key_type LHS, internal_key_type RHS) { + return LHS == RHS; + } + + static std::pair ReadKeyDataLength(const uint8_t *&Data) { + unsigned KeyLength = + endian::readNext(Data); + unsigned DataLength = + endian::readNext(Data); + return {KeyLength, DataLength}; + } + + static data_type ReadData(internal_key_type Key, const uint8_t *Data, + unsigned Length) { + unsigned NumElements = + endian::readNext(Data); + data_type Result; + Result.reserve(NumElements); + for (unsigned i = 0; i != NumElements; ++i) { + auto version = ReadVersionTuple(Data); + const auto *DataBefore = Data; + (void)DataBefore; + assert(Data != DataBefore && + "Unversioned data reader didn't move pointer"); + auto UnversionedData = Derived::readUnversioned(Key, Data); + Result.push_back({version, UnversionedData}); + } + return Result; + } +}; + +/// Read serialized CommonEntityInfo. +void ReadCommonEntityInfo(const uint8_t *&Data, CommonEntityInfo &Info) { + uint8_t UnavailableBits = *Data++; + Info.Unavailable = (UnavailableBits >> 1) & 0x01; + Info.UnavailableInSwift = UnavailableBits & 0x01; + if ((UnavailableBits >> 2) & 0x01) + Info.setSwiftPrivate(static_cast((UnavailableBits >> 3) & 0x01)); + + unsigned MsgLength = + endian::readNext(Data); + Info.UnavailableMsg = + std::string(reinterpret_cast(Data), + reinterpret_cast(Data) + MsgLength); + Data += MsgLength; + + unsigned SwiftNameLength = + endian::readNext(Data); + Info.SwiftName = + std::string(reinterpret_cast(Data), + reinterpret_cast(Data) + SwiftNameLength); + Data += SwiftNameLength; +} + +/// Read serialized CommonTypeInfo. +void ReadCommonTypeInfo(const uint8_t *&Data, CommonTypeInfo &Info) { + ReadCommonEntityInfo(Data, Info); + + unsigned SwiftBridgeLength = + endian::readNext(Data); + if (SwiftBridgeLength > 0) { + Info.setSwiftBridge(std::string(reinterpret_cast(Data), + SwiftBridgeLength - 1)); + Data += SwiftBridgeLength - 1; + } + + unsigned ErrorDomainLength = + endian::readNext(Data); + if (ErrorDomainLength > 0) { + Info.setNSErrorDomain(std::optional(std::string( + reinterpret_cast(Data), ErrorDomainLength - 1))); + Data += ErrorDomainLength - 1; + } +} + +/// Used to deserialize the on-disk identifier table. +class IdentifierTableInfo { +public: + using internal_key_type = llvm::StringRef; + using external_key_type = llvm::StringRef; + using data_type = IdentifierID; + using hash_value_type = uint32_t; + using offset_type = unsigned; + + internal_key_type GetInternalKey(external_key_type Key) { return Key; } + + external_key_type GetExternalKey(internal_key_type Key) { return Key; } + + hash_value_type ComputeHash(internal_key_type Key) { + return llvm::hash_value(Key); + } + + static bool EqualKey(internal_key_type LHS, internal_key_type RHS) { + return LHS == RHS; + } + + static std::pair ReadKeyDataLength(const uint8_t *&Data) { + unsigned KeyLength = + endian::readNext(Data); + unsigned DataLength = + endian::readNext(Data); + return {KeyLength, DataLength}; + } + + static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) { + return llvm::StringRef(reinterpret_cast(Data), Length); + } + + static data_type ReadData(internal_key_type key, const uint8_t *Data, + unsigned Length) { + return endian::readNext( + Data); + } +}; + +/// Used to deserialize the on-disk Objective-C class table. +class ObjCContextIDTableInfo { +public: + using internal_key_type = ContextTableKey; + using external_key_type = internal_key_type; + using data_type = unsigned; + using hash_value_type = size_t; + using offset_type = unsigned; + + internal_key_type GetInternalKey(external_key_type Key) { return Key; } + + external_key_type GetExternalKey(internal_key_type Key) { return Key; } + + hash_value_type ComputeHash(internal_key_type Key) { + return static_cast(Key.hashValue()); + } + + static bool EqualKey(internal_key_type LHS, internal_key_type RHS) { + return LHS == RHS; + } + + static std::pair ReadKeyDataLength(const uint8_t *&Data) { + unsigned KeyLength = + endian::readNext(Data); + unsigned DataLength = + endian::readNext(Data); + return {KeyLength, DataLength}; + } + + static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) { + auto ParentCtxID = + endian::readNext(Data); + auto ContextKind = + endian::readNext(Data); + auto NameID = + endian::readNext(Data); + return {ParentCtxID, ContextKind, NameID}; + } + + static data_type ReadData(internal_key_type Key, const uint8_t *Data, + unsigned Length) { + return endian::readNext( + Data); + } +}; + +/// Used to deserialize the on-disk Objective-C property table. +class ObjCContextInfoTableInfo + : public VersionedTableInfo { +public: + static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) { + return endian::readNext( + Data); + } + + hash_value_type ComputeHash(internal_key_type Key) { + return static_cast(llvm::hash_value(Key)); + } + + static ObjCContextInfo readUnversioned(internal_key_type Key, + const uint8_t *&Data) { + ObjCContextInfo Info; + ReadCommonTypeInfo(Data, Info); + uint8_t Payload = *Data++; + + if (Payload & 0x01) + Info.setHasDesignatedInits(true); + Payload = Payload >> 1; + + if (Payload & 0x4) + Info.setDefaultNullability(static_cast(Payload & 0x03)); + Payload >>= 3; + + if (Payload & (1 << 1)) + Info.setSwiftObjCMembers(Payload & 1); + Payload >>= 2; + + if (Payload & (1 << 1)) + Info.setSwiftImportAsNonGeneric(Payload & 1); + + return Info; + } +}; + +/// Read serialized VariableInfo. +void ReadVariableInfo(const uint8_t *&Data, VariableInfo &Info) { + ReadCommonEntityInfo(Data, Info); + if (*Data++) { + Info.setNullabilityAudited(static_cast(*Data)); + } + ++Data; + + auto TypeLen = + endian::readNext(Data); + Info.setType(std::string(Data, Data + TypeLen)); + Data += TypeLen; +} + +/// Used to deserialize the on-disk Objective-C property table. +class ObjCPropertyTableInfo + : public VersionedTableInfo, + ObjCPropertyInfo> { +public: + static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) { + auto ClassID = + endian::readNext(Data); + auto NameID = + endian::readNext(Data); + char IsInstance = + endian::readNext(Data); + return {ClassID, NameID, IsInstance}; + } + + hash_value_type ComputeHash(internal_key_type Key) { + return static_cast(llvm::hash_value(Key)); + } + + static ObjCPropertyInfo readUnversioned(internal_key_type Key, + const uint8_t *&Data) { + ObjCPropertyInfo Info; + ReadVariableInfo(Data, Info); + uint8_t Flags = *Data++; + if (Flags & (1 << 0)) + Info.setSwiftImportAsAccessors(Flags & (1 << 1)); + return Info; + } +}; + +/// Read serialized ParamInfo. +void ReadParamInfo(const uint8_t *&Data, ParamInfo &Info) { + ReadVariableInfo(Data, Info); + + uint8_t Payload = + endian::readNext(Data); + if (auto RawConvention = Payload & 0x7) { + auto Convention = static_cast(RawConvention - 1); + Info.setRetainCountConvention(Convention); + } + Payload >>= 3; + if (Payload & 0x01) + Info.setNoEscape(Payload & 0x02); + Payload >>= 2; + assert(Payload == 0 && "Bad API notes"); +} + +/// Read serialized FunctionInfo. +void ReadFunctionInfo(const uint8_t *&Data, FunctionInfo &Info) { + ReadCommonEntityInfo(Data, Info); + + uint8_t Payload = + endian::readNext(Data); + if (auto RawConvention = Payload & 0x7) { + auto Convention = static_cast(RawConvention - 1); + Info.setRetainCountConvention(Convention); + } + Payload >>= 3; + Info.NullabilityAudited = Payload & 0x1; + Payload >>= 1; + assert(Payload == 0 && "Bad API notes"); + + Info.NumAdjustedNullable = + endian::readNext(Data); + Info.NullabilityPayload = + endian::readNext(Data); + + unsigned NumParams = + endian::readNext(Data); + while (NumParams > 0) { + ParamInfo pi; + ReadParamInfo(Data, pi); + Info.Params.push_back(pi); + --NumParams; + } + + unsigned ResultTypeLen = + endian::readNext(Data); + Info.ResultType = std::string(Data, Data + ResultTypeLen); + Data += ResultTypeLen; +} + +/// Used to deserialize the on-disk Objective-C method table. +class ObjCMethodTableInfo + : public VersionedTableInfo, + ObjCMethodInfo> { +public: + static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) { + auto ClassID = + endian::readNext(Data); + auto SelectorID = + endian::readNext(Data); + auto IsInstance = + endian::readNext(Data); + return {ClassID, SelectorID, IsInstance}; + } + + hash_value_type ComputeHash(internal_key_type Key) { + return static_cast(llvm::hash_value(Key)); + } + + static ObjCMethodInfo readUnversioned(internal_key_type Key, + const uint8_t *&Data) { + ObjCMethodInfo Info; + uint8_t Payload = *Data++; + Info.RequiredInit = Payload & 0x01; + Payload >>= 1; + Info.DesignatedInit = Payload & 0x01; + Payload >>= 1; + + ReadFunctionInfo(Data, Info); + return Info; + } +}; + +/// Used to deserialize the on-disk Objective-C selector table. +class ObjCSelectorTableInfo { +public: + using internal_key_type = StoredObjCSelector; + using external_key_type = internal_key_type; + using data_type = SelectorID; + using hash_value_type = unsigned; + using offset_type = unsigned; + + internal_key_type GetInternalKey(external_key_type Key) { return Key; } + + external_key_type GetExternalKey(internal_key_type Key) { return Key; } + + hash_value_type ComputeHash(internal_key_type Key) { + return llvm::DenseMapInfo::getHashValue(Key); + } + + static bool EqualKey(internal_key_type LHS, internal_key_type RHS) { + return llvm::DenseMapInfo::isEqual(LHS, RHS); + } + + static std::pair ReadKeyDataLength(const uint8_t *&Data) { + unsigned KeyLength = + endian::readNext(Data); + unsigned DataLength = + endian::readNext(Data); + return {KeyLength, DataLength}; + } + + static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) { + internal_key_type Key; + Key.NumPieces = + endian::readNext(Data); + unsigned NumIdents = (Length - sizeof(uint16_t)) / sizeof(uint32_t); + for (unsigned i = 0; i != NumIdents; ++i) { + Key.Identifiers.push_back( + endian::readNext( + Data)); + } + return Key; + } + + static data_type ReadData(internal_key_type Key, const uint8_t *Data, + unsigned Length) { + return endian::readNext( + Data); + } +}; + +/// Used to deserialize the on-disk global variable table. +class GlobalVariableTableInfo + : public VersionedTableInfo { +public: + static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) { + auto CtxID = + endian::readNext(Data); + auto ContextKind = + endian::readNext(Data); + auto NameID = + endian::readNext(Data); + return {CtxID, ContextKind, NameID}; + } + + hash_value_type ComputeHash(internal_key_type Key) { + return static_cast(Key.hashValue()); + } + + static GlobalVariableInfo readUnversioned(internal_key_type Key, + const uint8_t *&Data) { + GlobalVariableInfo Info; + ReadVariableInfo(Data, Info); + return Info; + } +}; + +/// Used to deserialize the on-disk global function table. +class GlobalFunctionTableInfo + : public VersionedTableInfo { +public: + static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) { + auto CtxID = + endian::readNext(Data); + auto ContextKind = + endian::readNext(Data); + auto NameID = + endian::readNext(Data); + return {CtxID, ContextKind, NameID}; + } + + hash_value_type ComputeHash(internal_key_type Key) { + return static_cast(Key.hashValue()); + } + + static GlobalFunctionInfo readUnversioned(internal_key_type Key, + const uint8_t *&Data) { + GlobalFunctionInfo Info; + ReadFunctionInfo(Data, Info); + return Info; + } +}; + +/// Used to deserialize the on-disk enumerator table. +class EnumConstantTableInfo + : public VersionedTableInfo { +public: + static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) { + auto NameID = + endian::readNext(Data); + return NameID; + } + + hash_value_type ComputeHash(internal_key_type Key) { + return static_cast(llvm::hash_value(Key)); + } + + static EnumConstantInfo readUnversioned(internal_key_type Key, + const uint8_t *&Data) { + EnumConstantInfo Info; + ReadCommonEntityInfo(Data, Info); + return Info; + } +}; + +/// Used to deserialize the on-disk tag table. +class TagTableInfo + : public VersionedTableInfo { +public: + static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) { + auto CtxID = + endian::readNext(Data); + auto ContextKind = + endian::readNext(Data); + auto NameID = + endian::readNext( + Data); + return {CtxID, ContextKind, NameID}; + } + + hash_value_type ComputeHash(internal_key_type Key) { + return static_cast(Key.hashValue()); + } + + static TagInfo readUnversioned(internal_key_type Key, const uint8_t *&Data) { + TagInfo Info; + + uint8_t Payload = *Data++; + if (Payload & 1) + Info.setFlagEnum(Payload & 2); + Payload >>= 2; + if (Payload > 0) + Info.EnumExtensibility = + static_cast((Payload & 0x3) - 1); + + unsigned ImportAsLength = + endian::readNext(Data); + if (ImportAsLength > 0) { + Info.SwiftImportAs = + std::string(reinterpret_cast(Data), ImportAsLength - 1); + Data += ImportAsLength - 1; + } + unsigned RetainOpLength = + endian::readNext(Data); + if (RetainOpLength > 0) { + Info.SwiftRetainOp = + std::string(reinterpret_cast(Data), RetainOpLength - 1); + Data += RetainOpLength - 1; + } + unsigned ReleaseOpLength = + endian::readNext(Data); + if (ReleaseOpLength > 0) { + Info.SwiftReleaseOp = std::string(reinterpret_cast(Data), + ReleaseOpLength - 1); + Data += ReleaseOpLength - 1; + } + + ReadCommonTypeInfo(Data, Info); + return Info; + } +}; + +/// Used to deserialize the on-disk typedef table. +class TypedefTableInfo + : public VersionedTableInfo { +public: + static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) { + auto CtxID = + endian::readNext(Data); + auto ContextKind = + endian::readNext(Data); + auto nameID = + endian::readNext( + Data); + return {CtxID, ContextKind, nameID}; + } + + hash_value_type ComputeHash(internal_key_type Key) { + return static_cast(Key.hashValue()); + } + + static TypedefInfo readUnversioned(internal_key_type Key, + const uint8_t *&Data) { + TypedefInfo Info; + + uint8_t Payload = *Data++; + if (Payload > 0) + Info.SwiftWrapper = static_cast((Payload & 0x3) - 1); + + ReadCommonTypeInfo(Data, Info); + return Info; + } +}; +} // end anonymous namespace + +class APINotesReader::Implementation { +public: + /// The input buffer for the API notes data. + llvm::MemoryBuffer *InputBuffer; + + /// The Swift version to use for filtering. + llvm::VersionTuple SwiftVersion; + + /// The name of the module that we read from the control block. + std::string ModuleName; + + // The size and modification time of the source file from + // which this API notes file was created, if known. + std::optional> SourceFileSizeAndModTime; + + using SerializedIdentifierTable = + llvm::OnDiskIterableChainedHashTable; + + /// The identifier table. + std::unique_ptr IdentifierTable; + + using SerializedObjCContextIDTable = + llvm::OnDiskIterableChainedHashTable; + + /// The Objective-C context ID table. + std::unique_ptr ObjCContextIDTable; + + using SerializedObjCContextInfoTable = + llvm::OnDiskIterableChainedHashTable; + + /// The Objective-C context info table. + std::unique_ptr ObjCContextInfoTable; + + using SerializedObjCPropertyTable = + llvm::OnDiskIterableChainedHashTable; + + /// The Objective-C property table. + std::unique_ptr ObjCPropertyTable; + + using SerializedObjCMethodTable = + llvm::OnDiskIterableChainedHashTable; + + /// The Objective-C method table. + std::unique_ptr ObjCMethodTable; + + using SerializedObjCSelectorTable = + llvm::OnDiskIterableChainedHashTable; + + /// The Objective-C selector table. + std::unique_ptr ObjCSelectorTable; + + using SerializedGlobalVariableTable = + llvm::OnDiskIterableChainedHashTable; + + /// The global variable table. + std::unique_ptr GlobalVariableTable; + + using SerializedGlobalFunctionTable = + llvm::OnDiskIterableChainedHashTable; + + /// The global function table. + std::unique_ptr GlobalFunctionTable; + + using SerializedEnumConstantTable = + llvm::OnDiskIterableChainedHashTable; + + /// The enumerator table. + std::unique_ptr EnumConstantTable; + + using SerializedTagTable = llvm::OnDiskIterableChainedHashTable; + + /// The tag table. + std::unique_ptr TagTable; + + using SerializedTypedefTable = + llvm::OnDiskIterableChainedHashTable; + + /// The typedef table. + std::unique_ptr TypedefTable; + + /// Retrieve the identifier ID for the given string, or an empty + /// optional if the string is unknown. + std::optional getIdentifier(llvm::StringRef Str); + + /// Retrieve the selector ID for the given selector, or an empty + /// optional if the string is unknown. + std::optional getSelector(ObjCSelectorRef Selector); + + bool readControlBlock(llvm::BitstreamCursor &Cursor, + llvm::SmallVectorImpl &Scratch); + bool readIdentifierBlock(llvm::BitstreamCursor &Cursor, + llvm::SmallVectorImpl &Scratch); + bool readObjCContextBlock(llvm::BitstreamCursor &Cursor, + llvm::SmallVectorImpl &Scratch); + bool readObjCPropertyBlock(llvm::BitstreamCursor &Cursor, + llvm::SmallVectorImpl &Scratch); + bool readObjCMethodBlock(llvm::BitstreamCursor &Cursor, + llvm::SmallVectorImpl &Scratch); + bool readObjCSelectorBlock(llvm::BitstreamCursor &Cursor, + llvm::SmallVectorImpl &Scratch); + bool readGlobalVariableBlock(llvm::BitstreamCursor &Cursor, + llvm::SmallVectorImpl &Scratch); + bool readGlobalFunctionBlock(llvm::BitstreamCursor &Cursor, + llvm::SmallVectorImpl &Scratch); + bool readEnumConstantBlock(llvm::BitstreamCursor &Cursor, + llvm::SmallVectorImpl &Scratch); + bool readTagBlock(llvm::BitstreamCursor &Cursor, + llvm::SmallVectorImpl &Scratch); + bool readTypedefBlock(llvm::BitstreamCursor &Cursor, + llvm::SmallVectorImpl &Scratch); +}; + +std::optional +APINotesReader::Implementation::getIdentifier(llvm::StringRef Str) { + if (!IdentifierTable) + return std::nullopt; + + if (Str.empty()) + return IdentifierID(0); + + auto Known = IdentifierTable->find(Str); + if (Known == IdentifierTable->end()) + return std::nullopt; + + return *Known; +} + +std::optional +APINotesReader::Implementation::getSelector(ObjCSelectorRef Selector) { + if (!ObjCSelectorTable || !IdentifierTable) + return std::nullopt; + + // Translate the identifiers. + StoredObjCSelector Key; + for (auto Ident : Selector.Identifiers) { + if (auto IdentID = getIdentifier(Ident)) { + Key.Identifiers.push_back(*IdentID); + } else { + return std::nullopt; + } + } + + auto Known = ObjCSelectorTable->find(Key); + if (Known == ObjCSelectorTable->end()) + return std::nullopt; + + return *Known; +} + +bool APINotesReader::Implementation::readControlBlock( + llvm::BitstreamCursor &Cursor, llvm::SmallVectorImpl &Scratch) { + if (Cursor.EnterSubBlock(CONTROL_BLOCK_ID)) + return true; + + bool SawMetadata = false; + + llvm::Expected MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + llvm::BitstreamEntry Next = MaybeNext.get(); + + while (Next.Kind != llvm::BitstreamEntry::EndBlock) { + if (Next.Kind == llvm::BitstreamEntry::Error) + return true; + + if (Next.Kind == llvm::BitstreamEntry::SubBlock) { + // Unknown metadata sub-block, possibly for use by a future version of the + // API notes format. + if (Cursor.SkipBlock()) + return true; + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + continue; + } + + Scratch.clear(); + llvm::StringRef BlobData; + llvm::Expected MaybeKind = + Cursor.readRecord(Next.ID, Scratch, &BlobData); + if (!MaybeKind) { + // FIXME this drops the error on the floor. + consumeError(MaybeKind.takeError()); + return false; + } + unsigned Kind = MaybeKind.get(); + + switch (Kind) { + case control_block::METADATA: + // Already saw metadata. + if (SawMetadata) + return true; + + if (Scratch[0] != VERSION_MAJOR || Scratch[1] != VERSION_MINOR) + return true; + + SawMetadata = true; + break; + + case control_block::MODULE_NAME: + ModuleName = BlobData.str(); + break; + + case control_block::MODULE_OPTIONS: + break; + + case control_block::SOURCE_FILE: + SourceFileSizeAndModTime = {Scratch[0], Scratch[1]}; + break; + + default: + // Unknown metadata record, possibly for use by a future version of the + // module format. + break; + } + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + } + + return !SawMetadata; +} + +bool APINotesReader::Implementation::readIdentifierBlock( + llvm::BitstreamCursor &Cursor, llvm::SmallVectorImpl &Scratch) { + if (Cursor.EnterSubBlock(IDENTIFIER_BLOCK_ID)) + return true; + + llvm::Expected MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + llvm::BitstreamEntry Next = MaybeNext.get(); + + while (Next.Kind != llvm::BitstreamEntry::EndBlock) { + if (Next.Kind == llvm::BitstreamEntry::Error) + return true; + + if (Next.Kind == llvm::BitstreamEntry::SubBlock) { + // Unknown sub-block, possibly for use by a future version of the + // API notes format. + if (Cursor.SkipBlock()) + return true; + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + continue; + } + + Scratch.clear(); + llvm::StringRef BlobData; + llvm::Expected MaybeKind = + Cursor.readRecord(Next.ID, Scratch, &BlobData); + if (!MaybeKind) { + // FIXME this drops the error on the floor. + consumeError(MaybeKind.takeError()); + return false; + } + unsigned Kind = MaybeKind.get(); + switch (Kind) { + case identifier_block::IDENTIFIER_DATA: { + // Already saw identifier table. + if (IdentifierTable) + return true; + + uint32_t tableOffset; + identifier_block::IdentifierDataLayout::readRecord(Scratch, tableOffset); + auto base = reinterpret_cast(BlobData.data()); + + IdentifierTable.reset(SerializedIdentifierTable::Create( + base + tableOffset, base + sizeof(uint32_t), base)); + break; + } + + default: + // Unknown record, possibly for use by a future version of the + // module format. + break; + } + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + } + + return false; +} + +bool APINotesReader::Implementation::readObjCContextBlock( + llvm::BitstreamCursor &Cursor, llvm::SmallVectorImpl &Scratch) { + if (Cursor.EnterSubBlock(OBJC_CONTEXT_BLOCK_ID)) + return true; + + llvm::Expected MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + llvm::BitstreamEntry Next = MaybeNext.get(); + + while (Next.Kind != llvm::BitstreamEntry::EndBlock) { + if (Next.Kind == llvm::BitstreamEntry::Error) + return true; + + if (Next.Kind == llvm::BitstreamEntry::SubBlock) { + // Unknown sub-block, possibly for use by a future version of the + // API notes format. + if (Cursor.SkipBlock()) + return true; + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + continue; + } + + Scratch.clear(); + llvm::StringRef BlobData; + llvm::Expected MaybeKind = + Cursor.readRecord(Next.ID, Scratch, &BlobData); + if (!MaybeKind) { + // FIXME this drops the error on the floor. + consumeError(MaybeKind.takeError()); + return false; + } + unsigned Kind = MaybeKind.get(); + switch (Kind) { + case objc_context_block::OBJC_CONTEXT_ID_DATA: { + // Already saw Objective-C context ID table. + if (ObjCContextIDTable) + return true; + + uint32_t tableOffset; + objc_context_block::ObjCContextIDLayout::readRecord(Scratch, tableOffset); + auto base = reinterpret_cast(BlobData.data()); + + ObjCContextIDTable.reset(SerializedObjCContextIDTable::Create( + base + tableOffset, base + sizeof(uint32_t), base)); + break; + } + + case objc_context_block::OBJC_CONTEXT_INFO_DATA: { + // Already saw Objective-C context info table. + if (ObjCContextInfoTable) + return true; + + uint32_t tableOffset; + objc_context_block::ObjCContextInfoLayout::readRecord(Scratch, + tableOffset); + auto base = reinterpret_cast(BlobData.data()); + + ObjCContextInfoTable.reset(SerializedObjCContextInfoTable::Create( + base + tableOffset, base + sizeof(uint32_t), base)); + break; + } + + default: + // Unknown record, possibly for use by a future version of the + // module format. + break; + } + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + } + + return false; +} + +bool APINotesReader::Implementation::readObjCPropertyBlock( + llvm::BitstreamCursor &Cursor, llvm::SmallVectorImpl &Scratch) { + if (Cursor.EnterSubBlock(OBJC_PROPERTY_BLOCK_ID)) + return true; + + llvm::Expected MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + llvm::BitstreamEntry Next = MaybeNext.get(); + + while (Next.Kind != llvm::BitstreamEntry::EndBlock) { + if (Next.Kind == llvm::BitstreamEntry::Error) + return true; + + if (Next.Kind == llvm::BitstreamEntry::SubBlock) { + // Unknown sub-block, possibly for use by a future version of the + // API notes format. + if (Cursor.SkipBlock()) + return true; + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + continue; + } + + Scratch.clear(); + llvm::StringRef BlobData; + llvm::Expected MaybeKind = + Cursor.readRecord(Next.ID, Scratch, &BlobData); + if (!MaybeKind) { + // FIXME this drops the error on the floor. + consumeError(MaybeKind.takeError()); + return false; + } + unsigned Kind = MaybeKind.get(); + switch (Kind) { + case objc_property_block::OBJC_PROPERTY_DATA: { + // Already saw Objective-C property table. + if (ObjCPropertyTable) + return true; + + uint32_t tableOffset; + objc_property_block::ObjCPropertyDataLayout::readRecord(Scratch, + tableOffset); + auto base = reinterpret_cast(BlobData.data()); + + ObjCPropertyTable.reset(SerializedObjCPropertyTable::Create( + base + tableOffset, base + sizeof(uint32_t), base)); + break; + } + + default: + // Unknown record, possibly for use by a future version of the + // module format. + break; + } + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + } + + return false; +} + +bool APINotesReader::Implementation::readObjCMethodBlock( + llvm::BitstreamCursor &Cursor, llvm::SmallVectorImpl &Scratch) { + if (Cursor.EnterSubBlock(OBJC_METHOD_BLOCK_ID)) + return true; + + llvm::Expected MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + llvm::BitstreamEntry Next = MaybeNext.get(); + while (Next.Kind != llvm::BitstreamEntry::EndBlock) { + if (Next.Kind == llvm::BitstreamEntry::Error) + return true; + + if (Next.Kind == llvm::BitstreamEntry::SubBlock) { + // Unknown sub-block, possibly for use by a future version of the + // API notes format. + if (Cursor.SkipBlock()) + return true; + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + continue; + } + + Scratch.clear(); + llvm::StringRef BlobData; + llvm::Expected MaybeKind = + Cursor.readRecord(Next.ID, Scratch, &BlobData); + if (!MaybeKind) { + // FIXME this drops the error on the floor. + consumeError(MaybeKind.takeError()); + return false; + } + unsigned Kind = MaybeKind.get(); + switch (Kind) { + case objc_method_block::OBJC_METHOD_DATA: { + // Already saw Objective-C method table. + if (ObjCMethodTable) + return true; + + uint32_t tableOffset; + objc_method_block::ObjCMethodDataLayout::readRecord(Scratch, tableOffset); + auto base = reinterpret_cast(BlobData.data()); + + ObjCMethodTable.reset(SerializedObjCMethodTable::Create( + base + tableOffset, base + sizeof(uint32_t), base)); + break; + } + + default: + // Unknown record, possibly for use by a future version of the + // module format. + break; + } + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + } + + return false; +} + +bool APINotesReader::Implementation::readObjCSelectorBlock( + llvm::BitstreamCursor &Cursor, llvm::SmallVectorImpl &Scratch) { + if (Cursor.EnterSubBlock(OBJC_SELECTOR_BLOCK_ID)) + return true; + + llvm::Expected MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + llvm::BitstreamEntry Next = MaybeNext.get(); + while (Next.Kind != llvm::BitstreamEntry::EndBlock) { + if (Next.Kind == llvm::BitstreamEntry::Error) + return true; + + if (Next.Kind == llvm::BitstreamEntry::SubBlock) { + // Unknown sub-block, possibly for use by a future version of the + // API notes format. + if (Cursor.SkipBlock()) + return true; + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + continue; + } + + Scratch.clear(); + llvm::StringRef BlobData; + llvm::Expected MaybeKind = + Cursor.readRecord(Next.ID, Scratch, &BlobData); + if (!MaybeKind) { + // FIXME this drops the error on the floor. + consumeError(MaybeKind.takeError()); + return false; + } + unsigned Kind = MaybeKind.get(); + switch (Kind) { + case objc_selector_block::OBJC_SELECTOR_DATA: { + // Already saw Objective-C selector table. + if (ObjCSelectorTable) + return true; + + uint32_t tableOffset; + objc_selector_block::ObjCSelectorDataLayout::readRecord(Scratch, + tableOffset); + auto base = reinterpret_cast(BlobData.data()); + + ObjCSelectorTable.reset(SerializedObjCSelectorTable::Create( + base + tableOffset, base + sizeof(uint32_t), base)); + break; + } + + default: + // Unknown record, possibly for use by a future version of the + // module format. + break; + } + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + } + + return false; +} + +bool APINotesReader::Implementation::readGlobalVariableBlock( + llvm::BitstreamCursor &Cursor, llvm::SmallVectorImpl &Scratch) { + if (Cursor.EnterSubBlock(GLOBAL_VARIABLE_BLOCK_ID)) + return true; + + llvm::Expected MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + llvm::BitstreamEntry Next = MaybeNext.get(); + while (Next.Kind != llvm::BitstreamEntry::EndBlock) { + if (Next.Kind == llvm::BitstreamEntry::Error) + return true; + + if (Next.Kind == llvm::BitstreamEntry::SubBlock) { + // Unknown sub-block, possibly for use by a future version of the + // API notes format. + if (Cursor.SkipBlock()) + return true; + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + continue; + } + + Scratch.clear(); + llvm::StringRef BlobData; + llvm::Expected MaybeKind = + Cursor.readRecord(Next.ID, Scratch, &BlobData); + if (!MaybeKind) { + // FIXME this drops the error on the floor. + consumeError(MaybeKind.takeError()); + return false; + } + unsigned Kind = MaybeKind.get(); + switch (Kind) { + case global_variable_block::GLOBAL_VARIABLE_DATA: { + // Already saw global variable table. + if (GlobalVariableTable) + return true; + + uint32_t tableOffset; + global_variable_block::GlobalVariableDataLayout::readRecord(Scratch, + tableOffset); + auto base = reinterpret_cast(BlobData.data()); + + GlobalVariableTable.reset(SerializedGlobalVariableTable::Create( + base + tableOffset, base + sizeof(uint32_t), base)); + break; + } + + default: + // Unknown record, possibly for use by a future version of the + // module format. + break; + } + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + } + + return false; +} + +bool APINotesReader::Implementation::readGlobalFunctionBlock( + llvm::BitstreamCursor &Cursor, llvm::SmallVectorImpl &Scratch) { + if (Cursor.EnterSubBlock(GLOBAL_FUNCTION_BLOCK_ID)) + return true; + + llvm::Expected MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + llvm::BitstreamEntry Next = MaybeNext.get(); + while (Next.Kind != llvm::BitstreamEntry::EndBlock) { + if (Next.Kind == llvm::BitstreamEntry::Error) + return true; + + if (Next.Kind == llvm::BitstreamEntry::SubBlock) { + // Unknown sub-block, possibly for use by a future version of the + // API notes format. + if (Cursor.SkipBlock()) + return true; + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + continue; + } + + Scratch.clear(); + llvm::StringRef BlobData; + llvm::Expected MaybeKind = + Cursor.readRecord(Next.ID, Scratch, &BlobData); + if (!MaybeKind) { + // FIXME this drops the error on the floor. + consumeError(MaybeKind.takeError()); + return false; + } + unsigned Kind = MaybeKind.get(); + switch (Kind) { + case global_function_block::GLOBAL_FUNCTION_DATA: { + // Already saw global function table. + if (GlobalFunctionTable) + return true; + + uint32_t tableOffset; + global_function_block::GlobalFunctionDataLayout::readRecord(Scratch, + tableOffset); + auto base = reinterpret_cast(BlobData.data()); + + GlobalFunctionTable.reset(SerializedGlobalFunctionTable::Create( + base + tableOffset, base + sizeof(uint32_t), base)); + break; + } + + default: + // Unknown record, possibly for use by a future version of the + // module format. + break; + } + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + } + + return false; +} + +bool APINotesReader::Implementation::readEnumConstantBlock( + llvm::BitstreamCursor &Cursor, llvm::SmallVectorImpl &Scratch) { + if (Cursor.EnterSubBlock(ENUM_CONSTANT_BLOCK_ID)) + return true; + + llvm::Expected MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + llvm::BitstreamEntry Next = MaybeNext.get(); + while (Next.Kind != llvm::BitstreamEntry::EndBlock) { + if (Next.Kind == llvm::BitstreamEntry::Error) + return true; + + if (Next.Kind == llvm::BitstreamEntry::SubBlock) { + // Unknown sub-block, possibly for use by a future version of the + // API notes format. + if (Cursor.SkipBlock()) + return true; + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + continue; + } + + Scratch.clear(); + llvm::StringRef BlobData; + llvm::Expected MaybeKind = + Cursor.readRecord(Next.ID, Scratch, &BlobData); + if (!MaybeKind) { + // FIXME this drops the error on the floor. + consumeError(MaybeKind.takeError()); + return false; + } + unsigned Kind = MaybeKind.get(); + switch (Kind) { + case enum_constant_block::ENUM_CONSTANT_DATA: { + // Already saw enumerator table. + if (EnumConstantTable) + return true; + + uint32_t tableOffset; + enum_constant_block::EnumConstantDataLayout::readRecord(Scratch, + tableOffset); + auto base = reinterpret_cast(BlobData.data()); + + EnumConstantTable.reset(SerializedEnumConstantTable::Create( + base + tableOffset, base + sizeof(uint32_t), base)); + break; + } + + default: + // Unknown record, possibly for use by a future version of the + // module format. + break; + } + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + } + + return false; +} + +bool APINotesReader::Implementation::readTagBlock( + llvm::BitstreamCursor &Cursor, llvm::SmallVectorImpl &Scratch) { + if (Cursor.EnterSubBlock(TAG_BLOCK_ID)) + return true; + + llvm::Expected MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + llvm::BitstreamEntry Next = MaybeNext.get(); + while (Next.Kind != llvm::BitstreamEntry::EndBlock) { + if (Next.Kind == llvm::BitstreamEntry::Error) + return true; + + if (Next.Kind == llvm::BitstreamEntry::SubBlock) { + // Unknown sub-block, possibly for use by a future version of the + // API notes format. + if (Cursor.SkipBlock()) + return true; + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + continue; + } + + Scratch.clear(); + llvm::StringRef BlobData; + llvm::Expected MaybeKind = + Cursor.readRecord(Next.ID, Scratch, &BlobData); + if (!MaybeKind) { + // FIXME this drops the error on the floor. + consumeError(MaybeKind.takeError()); + return false; + } + unsigned Kind = MaybeKind.get(); + switch (Kind) { + case tag_block::TAG_DATA: { + // Already saw tag table. + if (TagTable) + return true; + + uint32_t tableOffset; + tag_block::TagDataLayout::readRecord(Scratch, tableOffset); + auto base = reinterpret_cast(BlobData.data()); + + TagTable.reset(SerializedTagTable::Create(base + tableOffset, + base + sizeof(uint32_t), base)); + break; + } + + default: + // Unknown record, possibly for use by a future version of the + // module format. + break; + } + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + } + + return false; +} + +bool APINotesReader::Implementation::readTypedefBlock( + llvm::BitstreamCursor &Cursor, llvm::SmallVectorImpl &Scratch) { + if (Cursor.EnterSubBlock(TYPEDEF_BLOCK_ID)) + return true; + + llvm::Expected MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + llvm::BitstreamEntry Next = MaybeNext.get(); + while (Next.Kind != llvm::BitstreamEntry::EndBlock) { + if (Next.Kind == llvm::BitstreamEntry::Error) + return true; + + if (Next.Kind == llvm::BitstreamEntry::SubBlock) { + // Unknown sub-block, possibly for use by a future version of the + // API notes format. + if (Cursor.SkipBlock()) + return true; + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + continue; + } + + Scratch.clear(); + llvm::StringRef BlobData; + llvm::Expected MaybeKind = + Cursor.readRecord(Next.ID, Scratch, &BlobData); + if (!MaybeKind) { + // FIXME this drops the error on the floor. + consumeError(MaybeKind.takeError()); + return false; + } + unsigned Kind = MaybeKind.get(); + switch (Kind) { + case typedef_block::TYPEDEF_DATA: { + // Already saw typedef table. + if (TypedefTable) + return true; + + uint32_t tableOffset; + typedef_block::TypedefDataLayout::readRecord(Scratch, tableOffset); + auto base = reinterpret_cast(BlobData.data()); + + TypedefTable.reset(SerializedTypedefTable::Create( + base + tableOffset, base + sizeof(uint32_t), base)); + break; + } + + default: + // Unknown record, possibly for use by a future version of the + // module format. + break; + } + + MaybeNext = Cursor.advance(); + if (!MaybeNext) { + // FIXME this drops the error on the floor. + consumeError(MaybeNext.takeError()); + return false; + } + Next = MaybeNext.get(); + } + + return false; +} + +APINotesReader::APINotesReader(llvm::MemoryBuffer *InputBuffer, + llvm::VersionTuple SwiftVersion, bool &Failed) + : Implementation(new class Implementation) { + Failed = false; + + // Initialize the input buffer. + Implementation->InputBuffer = InputBuffer; + Implementation->SwiftVersion = SwiftVersion; + llvm::BitstreamCursor Cursor(*Implementation->InputBuffer); + + // Validate signature. + for (auto byte : API_NOTES_SIGNATURE) { + if (Cursor.AtEndOfStream()) { + Failed = true; + return; + } + if (llvm::Expected maybeRead = + Cursor.Read(8)) { + if (maybeRead.get() != byte) { + Failed = true; + return; + } + } else { + // FIXME this drops the error on the floor. + consumeError(maybeRead.takeError()); + Failed = true; + return; + } + } + + // Look at all of the blocks. + bool HasValidControlBlock = false; + llvm::SmallVector Scratch; + while (!Cursor.AtEndOfStream()) { + llvm::Expected MaybeTopLevelEntry = Cursor.advance(); + if (!MaybeTopLevelEntry) { + // FIXME this drops the error on the floor. + consumeError(MaybeTopLevelEntry.takeError()); + Failed = true; + return; + } + llvm::BitstreamEntry TopLevelEntry = MaybeTopLevelEntry.get(); + + if (TopLevelEntry.Kind != llvm::BitstreamEntry::SubBlock) + break; + + switch (TopLevelEntry.ID) { + case llvm::bitc::BLOCKINFO_BLOCK_ID: + if (!Cursor.ReadBlockInfoBlock()) { + Failed = true; + break; + } + break; + + case CONTROL_BLOCK_ID: + // Only allow a single control block. + if (HasValidControlBlock || + Implementation->readControlBlock(Cursor, Scratch)) { + Failed = true; + return; + } + + HasValidControlBlock = true; + break; + + case IDENTIFIER_BLOCK_ID: + if (!HasValidControlBlock || + Implementation->readIdentifierBlock(Cursor, Scratch)) { + Failed = true; + return; + } + break; + + case OBJC_CONTEXT_BLOCK_ID: + if (!HasValidControlBlock || + Implementation->readObjCContextBlock(Cursor, Scratch)) { + Failed = true; + return; + } + + break; + + case OBJC_PROPERTY_BLOCK_ID: + if (!HasValidControlBlock || + Implementation->readObjCPropertyBlock(Cursor, Scratch)) { + Failed = true; + return; + } + break; + + case OBJC_METHOD_BLOCK_ID: + if (!HasValidControlBlock || + Implementation->readObjCMethodBlock(Cursor, Scratch)) { + Failed = true; + return; + } + break; + + case OBJC_SELECTOR_BLOCK_ID: + if (!HasValidControlBlock || + Implementation->readObjCSelectorBlock(Cursor, Scratch)) { + Failed = true; + return; + } + break; + + case GLOBAL_VARIABLE_BLOCK_ID: + if (!HasValidControlBlock || + Implementation->readGlobalVariableBlock(Cursor, Scratch)) { + Failed = true; + return; + } + break; + + case GLOBAL_FUNCTION_BLOCK_ID: + if (!HasValidControlBlock || + Implementation->readGlobalFunctionBlock(Cursor, Scratch)) { + Failed = true; + return; + } + break; + + case ENUM_CONSTANT_BLOCK_ID: + if (!HasValidControlBlock || + Implementation->readEnumConstantBlock(Cursor, Scratch)) { + Failed = true; + return; + } + break; + + case TAG_BLOCK_ID: + if (!HasValidControlBlock || + Implementation->readTagBlock(Cursor, Scratch)) { + Failed = true; + return; + } + break; + + case TYPEDEF_BLOCK_ID: + if (!HasValidControlBlock || + Implementation->readTypedefBlock(Cursor, Scratch)) { + Failed = true; + return; + } + break; + + default: + // Unknown top-level block, possibly for use by a future version of the + // module format. + if (Cursor.SkipBlock()) { + Failed = true; + return; + } + break; + } + } + + if (!Cursor.AtEndOfStream()) { + Failed = true; + return; + } +} + +APINotesReader::~APINotesReader() { delete Implementation->InputBuffer; } + +std::unique_ptr +APINotesReader::Create(std::unique_ptr InputBuffer, + llvm::VersionTuple SwiftVersion) { + bool Failed = false; + std::unique_ptr Reader( + new APINotesReader(InputBuffer.release(), SwiftVersion, Failed)); + if (Failed) + return nullptr; + + return Reader; +} + +template +APINotesReader::VersionedInfo::VersionedInfo( + llvm::VersionTuple Version, + llvm::SmallVector, 1> Results) + : Results(std::move(Results)) { + + assert(!Results.empty()); + assert(std::is_sorted( + Results.begin(), Results.end(), + [](const std::pair &left, + const std::pair &right) -> bool { + assert(left.first != right.first && "two entries for the same version"); + return left.first < right.first; + })); + + Selected = std::nullopt; + for (unsigned i = 0, n = Results.size(); i != n; ++i) { + if (!Version.empty() && Results[i].first >= Version) { + // If the current version is "4", then entries for 4 are better than + // entries for 5, but both are valid. Because entries are sorted, we get + // that behavior by picking the first match. + Selected = i; + break; + } + } + + // If we didn't find a match but we have an unversioned result, use the + // unversioned result. This will always be the first entry because we encode + // it as version 0. + if (!Selected && Results[0].first.empty()) + Selected = 0; +} + +auto APINotesReader::lookupObjCClassID(llvm::StringRef Name) + -> std::optional { + if (!Implementation->ObjCContextIDTable) + return std::nullopt; + + std::optional ClassID = Implementation->getIdentifier(Name); + if (!ClassID) + return std::nullopt; + + // ObjC classes can't be declared in C++ namespaces, so use -1 as the global + // context. + auto KnownID = Implementation->ObjCContextIDTable->find( + ContextTableKey(-1, (uint8_t)ContextKind::ObjCClass, *ClassID)); + if (KnownID == Implementation->ObjCContextIDTable->end()) + return std::nullopt; + + return ContextID(*KnownID); +} + +auto APINotesReader::lookupObjCClassInfo(llvm::StringRef Name) + -> VersionedInfo { + if (!Implementation->ObjCContextInfoTable) + return std::nullopt; + + std::optional CtxID = lookupObjCClassID(Name); + if (!CtxID) + return std::nullopt; + + auto KnownInfo = Implementation->ObjCContextInfoTable->find(CtxID->Value); + if (KnownInfo == Implementation->ObjCContextInfoTable->end()) + return std::nullopt; + + return {Implementation->SwiftVersion, *KnownInfo}; +} + +auto APINotesReader::lookupObjCProtocolID(llvm::StringRef Name) + -> std::optional { + if (!Implementation->ObjCContextIDTable) + return std::nullopt; + + std::optional classID = Implementation->getIdentifier(Name); + if (!classID) + return std::nullopt; + + // ObjC classes can't be declared in C++ namespaces, so use -1 as the global + // context. + auto KnownID = Implementation->ObjCContextIDTable->find( + ContextTableKey(-1, (uint8_t)ContextKind::ObjCProtocol, *classID)); + if (KnownID == Implementation->ObjCContextIDTable->end()) + return std::nullopt; + + return ContextID(*KnownID); +} + +auto APINotesReader::lookupObjCProtocolInfo(llvm::StringRef Name) + -> VersionedInfo { + if (!Implementation->ObjCContextInfoTable) + return std::nullopt; + + std::optional CtxID = lookupObjCProtocolID(Name); + if (!CtxID) + return std::nullopt; + + auto KnownInfo = Implementation->ObjCContextInfoTable->find(CtxID->Value); + if (KnownInfo == Implementation->ObjCContextInfoTable->end()) + return std::nullopt; + + return {Implementation->SwiftVersion, *KnownInfo}; +} + +auto APINotesReader::lookupObjCProperty(ContextID CtxID, llvm::StringRef Name, + bool IsInstance) + -> VersionedInfo { + if (!Implementation->ObjCPropertyTable) + return std::nullopt; + + std::optional PropertyID = Implementation->getIdentifier(Name); + if (!PropertyID) + return std::nullopt; + + auto Known = Implementation->ObjCPropertyTable->find( + std::make_tuple(CtxID.Value, *PropertyID, (char)IsInstance)); + if (Known == Implementation->ObjCPropertyTable->end()) + return std::nullopt; + + return {Implementation->SwiftVersion, *Known}; +} + +auto APINotesReader::lookupObjCMethod(ContextID CtxID, ObjCSelectorRef Selector, + bool IsInstanceMethod) + -> VersionedInfo { + if (!Implementation->ObjCMethodTable) + return std::nullopt; + + std::optional SelID = Implementation->getSelector(Selector); + if (!SelID) + return std::nullopt; + + auto Known = Implementation->ObjCMethodTable->find( + ObjCMethodTableInfo::internal_key_type{CtxID.Value, *SelID, + IsInstanceMethod}); + if (Known == Implementation->ObjCMethodTable->end()) + return std::nullopt; + + return {Implementation->SwiftVersion, *Known}; +} + +auto APINotesReader::lookupGlobalVariable(llvm::StringRef Name, + std::optional Ctx) + -> VersionedInfo { + if (!Implementation->GlobalVariableTable) + return std::nullopt; + + std::optional NameID = Implementation->getIdentifier(Name); + if (!NameID) + return std::nullopt; + + ContextTableKey Key(Ctx, *NameID); + + auto Known = Implementation->GlobalVariableTable->find(Key); + if (Known == Implementation->GlobalVariableTable->end()) + return std::nullopt; + + return {Implementation->SwiftVersion, *Known}; +} + +auto APINotesReader::lookupGlobalFunction(llvm::StringRef Name, + std::optional Ctx) + -> VersionedInfo { + if (!Implementation->GlobalFunctionTable) + return std::nullopt; + + std::optional NameID = Implementation->getIdentifier(Name); + if (!NameID) + return std::nullopt; + + ContextTableKey Key(Ctx, *NameID); + + auto Known = Implementation->GlobalFunctionTable->find(Key); + if (Known == Implementation->GlobalFunctionTable->end()) + return std::nullopt; + + return {Implementation->SwiftVersion, *Known}; +} + +auto APINotesReader::lookupEnumConstant(llvm::StringRef Name) + -> VersionedInfo { + if (!Implementation->EnumConstantTable) + return std::nullopt; + + std::optional NameID = Implementation->getIdentifier(Name); + if (!NameID) + return std::nullopt; + + auto Known = Implementation->EnumConstantTable->find(*NameID); + if (Known == Implementation->EnumConstantTable->end()) + return std::nullopt; + + return {Implementation->SwiftVersion, *Known}; +} + +auto APINotesReader::lookupTag(llvm::StringRef Name, std::optional Ctx) + -> VersionedInfo { + if (!Implementation->TagTable) + return std::nullopt; + + std::optional NameID = Implementation->getIdentifier(Name); + if (!NameID) + return std::nullopt; + + ContextTableKey Key(Ctx, *NameID); + + auto Known = Implementation->TagTable->find(Key); + if (Known == Implementation->TagTable->end()) + return std::nullopt; + + return {Implementation->SwiftVersion, *Known}; +} + +auto APINotesReader::lookupTypedef(llvm::StringRef Name, + std::optional Ctx) + -> VersionedInfo { + if (!Implementation->TypedefTable) + return std::nullopt; + + std::optional NameID = Implementation->getIdentifier(Name); + if (!NameID) + return std::nullopt; + + ContextTableKey Key(Ctx, *NameID); + + auto Known = Implementation->TypedefTable->find(Key); + if (Known == Implementation->TypedefTable->end()) + return std::nullopt; + + return {Implementation->SwiftVersion, *Known}; +} + +auto APINotesReader::lookupNamespaceID( + llvm::StringRef Name, std::optional ParentNamespaceID) + -> std::optional { + if (!Implementation->ObjCContextIDTable) + return std::nullopt; + + std::optional NamespaceID = Implementation->getIdentifier(Name); + if (!NamespaceID) + return std::nullopt; + + uint32_t RawParentNamespaceID = + ParentNamespaceID ? ParentNamespaceID->Value : -1; + auto KnownID = Implementation->ObjCContextIDTable->find( + {RawParentNamespaceID, (uint8_t)ContextKind::Namespace, *NamespaceID}); + if (KnownID == Implementation->ObjCContextIDTable->end()) + return std::nullopt; + + return ContextID(*KnownID); +} + +} // namespace api_notes +} // namespace clang diff --git a/clang/lib/APINotes/CMakeLists.txt b/clang/lib/APINotes/CMakeLists.txt index c34168876a42e2..dec596ea160c68 100644 --- a/clang/lib/APINotes/CMakeLists.txt +++ b/clang/lib/APINotes/CMakeLists.txt @@ -1,6 +1,9 @@ set(LLVM_LINK_COMPONENTS + BitReader + BitstreamReader Support) add_clang_library(clangAPINotes + APINotesReader.cpp APINotesTypes.cpp APINotesWriter.cpp APINotesYAMLCompiler.cpp From 4bbb2bc051c751067ea3723978e5fae1acfe6d40 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 30 Oct 2023 14:41:09 +0000 Subject: [PATCH 023/144] [gn build] Port bb352b6ead5b --- llvm/utils/gn/secondary/clang/lib/APINotes/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/lib/APINotes/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/APINotes/BUILD.gn index 08c10f0284b8f3..2a367bed33a1a3 100644 --- a/llvm/utils/gn/secondary/clang/lib/APINotes/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/APINotes/BUILD.gn @@ -6,6 +6,7 @@ static_library("APINotes") { "//llvm/lib/Support", ] sources = [ + "APINotesReader.cpp", "APINotesTypes.cpp", "APINotesWriter.cpp", "APINotesYAMLCompiler.cpp", From 6a62707c048e16ce9bad37ed8e3520799139436b Mon Sep 17 00:00:00 2001 From: agozillon Date: Mon, 30 Oct 2023 16:00:23 +0100 Subject: [PATCH 024/144] [Flang][OpenMP][MLIR] Initial array section mapping MLIR -> LLVM-IR lowering utilising omp.bounds (#68689) This patch seeks to add initial lowering of OpenMP array sections within target region map clauses from MLIR to LLVM IR. This patch seeks to support fixed sized contiguous (don't think OpenMP supports anything other than contiguous sections from my reading but i could be wrong) arrays initially, before looking toward assumed size and shaped arrays. The patch also currently does not include stride, it's left for future work. Although, assumed size works in some fashion (dummy arguments) with some minor alterations to the OMPEarlyOutliner, so it is possible changes made in the IsolatedFromAbove series may allow this to work with no further required patches. It utilises the generated omp.bounds to calculate the size of the mapped OpenMP array (both for sectioned and un-sectioned arrays) as well as the offset to be passed to the kernel argument structure. Alongside these changes some refactoring of how map data is handled is attempted, using a new MapData structure to keep track of information utilised in the lowering of mapped values. The initial addition of a more complex createDeviceArgumentAccessor that utilises capture kinds similarly to (and loosely based on) Clang to generate different kernel argument accesses is also added. A similar function for altering how the kernel argument is passed to the kernel argument structure on the host is also utilised (createAlteredByCaptureMap), which allows modification of the pointer/basePointer based on their capture (and bounds information). It's of note ByRef, is the default for explicit mappings and ByCopy will be the default for implicit captures, so the former is currently tested in this patch and the latter is not for the moment. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 535 +++++++++++++----- .../omptarget-array-sectioning-host.mlir | 56 ++ ...target-byref-bycopy-generation-device.mlir | 41 ++ ...mptarget-byref-bycopy-generation-host.mlir | 42 ++ mlir/test/Target/LLVMIR/omptarget-llvm.mlir | 61 +- .../omptarget-region-parallel-llvm.mlir | 2 +- .../basic-target-region-1D-array-section.f90 | 27 + .../basic-target-region-3D-array-section.f90 | 39 ++ .../fortran/basic-target-region-3D-array.f90 | 45 ++ .../fortran/basic-target-region-array.f90 | 27 + 10 files changed, 710 insertions(+), 165 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir create mode 100644 mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir create mode 100644 mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir create mode 100644 openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 create mode 100644 openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 create mode 100644 openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 create mode 100644 openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90 diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 875ce11391587e..67875f668d4d3e 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1537,13 +1537,6 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, return success(); } -int64_t getSizeInBytes(DataLayout &DL, const Type &type, const Type &eleType) { - if (isa(type)) - return DL.getTypeSize(eleType); - - return 0; -} - static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) { switch (deviceClause) { @@ -1638,13 +1631,141 @@ getRefPtrIfDeclareTarget(mlir::Value value, return nullptr; } +// A small helper structure to contain data gathered +// for map lowering and coalese it into one area and +// avoiding extra computations such as searches in the +// llvm module for lowered mapped varibles or checking +// if something is declare target (and retrieving the +// value) more than neccessary. +struct MapInfoData : llvm::OpenMPIRBuilder::MapInfosTy { + llvm::SmallVector IsDeclareTarget; + llvm::SmallVector MapClause; + llvm::SmallVector OriginalValue; + // Stripped off array/pointer to get the underlying + // element type + llvm::SmallVector BaseType; + + /// Append arrays in \a CurInfo. + void append(MapInfoData &CurInfo) { + IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(), + CurInfo.IsDeclareTarget.end()); + MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end()); + OriginalValue.append(CurInfo.OriginalValue.begin(), + CurInfo.OriginalValue.end()); + BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end()); + llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo); + } +}; + +uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) { + if (auto nestedArrTy = llvm::dyn_cast_if_present( + arrTy.getElementType())) + return getArrayElementSizeInBits(nestedArrTy, dl); + return dl.getTypeSizeInBits(arrTy.getElementType()); +} + +// This function calculates the size to be offloaded for a specified type, given +// its associated map clause (which can contain bounds information which affects +// the total size), this size is calculated based on the underlying element type +// e.g. given a 1-D array of ints, we will calculate the size from the integer +// type * number of elements in the array. This size can be used in other +// calculations but is ultimately used as an argument to the OpenMP runtimes +// kernel argument structure which is generated through the combinedInfo data +// structures. +// This function is somewhat equivalent to Clang's getExprTypeSize inside of +// CGOpenMPRuntime.cpp. +llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type, + Operation *clauseOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives + // the size in inconsistent byte or bit format. + uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type); + if (auto arrTy = llvm::dyn_cast_if_present(type)) { + underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl); + } + + if (auto memberClause = + mlir::dyn_cast_if_present(clauseOp)) { + // This calculates the size to transfer based on bounds and the underlying + // element type, provided bounds have been specified (Fortran + // pointers/allocatables/target and arrays that have sections specified fall + // into this as well). + if (!memberClause.getBounds().empty()) { + llvm::Value *elementCount = builder.getInt64(1); + for (auto bounds : memberClause.getBounds()) { + if (auto boundOp = mlir::dyn_cast_if_present( + bounds.getDefiningOp())) { + // The below calculation for the size to be mapped calculated from the + // map_info's bounds is: (elemCount * [UB - LB] + 1), later we + // multiply by the underlying element types byte size to get the full + // size to be offloaded based on the bounds + elementCount = builder.CreateMul( + elementCount, + builder.CreateAdd( + builder.CreateSub( + moduleTranslation.lookupValue(boundOp.getUpperBound()), + moduleTranslation.lookupValue(boundOp.getLowerBound())), + builder.getInt64(1))); + } + } + + // The size in bytes x number of elements, the sizeInBytes stored is + // the underyling types size, e.g. if ptr, it'll be the i32's + // size, so we do some on the fly runtime math to get the size in + // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need + // some adjustment for members with more complex types. + return builder.CreateMul(elementCount, + builder.getInt64(underlyingTypeSzInBits / 8)); + } + } + + return builder.getInt64(underlyingTypeSzInBits / 8); +} + +void collectMapDataFromMapOperands(MapInfoData &mapData, + llvm::SmallVectorImpl &mapOperands, + LLVM::ModuleTranslation &moduleTranslation, + DataLayout &dl, + llvm::IRBuilderBase &builder) { + for (mlir::Value mapValue : mapOperands) { + assert(mlir::isa(mapValue.getDefiningOp()) && + "missing map info operation or incorrect map info operation type"); + if (auto mapOp = mlir::dyn_cast_if_present( + mapValue.getDefiningOp())) { + mapData.OriginalValue.push_back( + moduleTranslation.lookupValue(mapOp.getVarPtr())); + mapData.Pointers.push_back(mapData.OriginalValue.back()); + + if (llvm::Value *refPtr = getRefPtrIfDeclareTarget( + mapOp.getVarPtr(), moduleTranslation)) { // declare target + mapData.IsDeclareTarget.push_back(true); + mapData.BasePointers.push_back(refPtr); + } else { // regular mapped variable + mapData.IsDeclareTarget.push_back(false); + mapData.BasePointers.push_back(mapData.OriginalValue.back()); + } + + mapData.Sizes.push_back(getSizeInBytes(dl, mapOp.getVarType(), mapOp, + builder, moduleTranslation)); + mapData.BaseType.push_back( + moduleTranslation.convertType(mapOp.getVarType())); + mapData.MapClause.push_back(mapOp.getOperation()); + mapData.Types.push_back( + llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value())); + mapData.Names.push_back(LLVM::createMappingInformation( + mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder())); + mapData.DevicePointers.push_back( + llvm::OpenMPIRBuilder::DeviceInfoTy::None); + } + } +} + // Generate all map related information and fill the combinedInfo. static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, - DataLayout &DL, + DataLayout &dl, llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, - const SmallVector &mapOperands, - const ArrayAttr &mapTypes, + MapInfoData &mapData, const SmallVector &devPtrOperands = {}, const SmallVector &devAddrOperands = {}, bool isTargetParams = false) { @@ -1659,58 +1780,39 @@ static void genMapInfos(llvm::IRBuilderBase &builder, combinedInfo.Names.clear(); }; - auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) { - index = 0; - for (auto basePtr : combinedInfo.BasePointers) { - if (basePtr == val) - return true; - index++; - } - return false; - }; - - unsigned index = 0; - for (const auto &mapOp : mapOperands) { - // Unlike dev_ptr and dev_addr operands these map operands point - // to a map entry operation which contains further information - // on the variable being mapped and how it should be mapped. - auto mapInfoOp = - mlir::dyn_cast(mapOp.getDefiningOp()); - - // TODO: Only LLVMPointerTypes are handled. - if (!mapInfoOp.getType().isa()) - return fail(); - - llvm::Value *mapOpValue = - moduleTranslation.lookupValue(mapInfoOp.getVarPtr()); - - llvm::Value *refPtr = - getRefPtrIfDeclareTarget(mapInfoOp.getVarPtr(), moduleTranslation); - - combinedInfo.BasePointers.emplace_back(refPtr ? refPtr : mapOpValue); - combinedInfo.Pointers.emplace_back(mapOpValue); - combinedInfo.DevicePointers.emplace_back( - llvm::OpenMPIRBuilder::DeviceInfoTy::None); - combinedInfo.Names.emplace_back(LLVM::createMappingInformation( - mapInfoOp.getVarPtr().getLoc(), *ompBuilder)); - - auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags( - mapTypes[index].cast().getUInt()); - + // We operate under the assumption that all vectors that are + // required in MapInfoData are of equal lengths (either filled with + // default constructed data or appropiate information) so we can + // utilise the size from any component of MapInfoData, if we can't + // something is missing from the initial MapInfoData construction. + for (size_t i = 0; i < mapData.MapClause.size(); ++i) { // Declare Target Mappings are excluded from being marked as // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're marked // with OMP_MAP_PTR_AND_OBJ instead. - if (refPtr) + auto mapFlag = mapData.Types[i]; + if (mapData.IsDeclareTarget[i]) mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; else if (isTargetParams) mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + combinedInfo.BasePointers.emplace_back(mapData.BasePointers[i]); + combinedInfo.Pointers.emplace_back(mapData.Pointers[i]); + combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[i]); + combinedInfo.Names.emplace_back(mapData.Names[i]); combinedInfo.Types.emplace_back(mapFlag); - combinedInfo.Sizes.emplace_back(builder.getInt64(getSizeInBytes( - DL, mapInfoOp.getVarPtr().getType(), mapInfoOp.getVarType()))); - index++; + combinedInfo.Sizes.emplace_back(mapData.Sizes[i]); } + auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) { + index = 0; + for (llvm::Value *basePtr : combinedInfo.BasePointers) { + if (basePtr == val) + return true; + index++; + } + return false; + }; + auto addDevInfos = [&, fail](auto devOperands, auto devOpType) -> void { for (const auto &devOp : devOperands) { // TODO: Only LLVMPointerTypes are handled. @@ -1756,19 +1858,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - auto getMapTypes = [](mlir::OperandRange mapOperands, - mlir::MLIRContext *ctx) { - SmallVector mapTypes; - for (auto mapValue : mapOperands) { - if (mapValue.getDefiningOp()) { - auto mapOp = - mlir::dyn_cast(mapValue.getDefiningOp()); - mapTypes.push_back(mapOp.getMapTypeAttr()); - } - } - return mlir::ArrayAttr::get(ctx, mapTypes); - }; - LogicalResult result = llvm::TypeSwitch(op) .Case([&](omp::DataOp dataOp) { @@ -1782,8 +1871,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, deviceID = intAttr.getInt(); mapOperands = dataOp.getMapOperands(); - mapTypes = - getMapTypes(dataOp.getMapOperands(), dataOp->getContext()); useDevPtrOperands = dataOp.getUseDevicePtr(); useDevAddrOperands = dataOp.getUseDeviceAddr(); return success(); @@ -1802,8 +1889,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, deviceID = intAttr.getInt(); RTLFn = llvm::omp::OMPRTL___tgt_target_data_begin_mapper; mapOperands = enterDataOp.getMapOperands(); - mapTypes = getMapTypes(enterDataOp.getMapOperands(), - enterDataOp->getContext()); return success(); }) .Case([&](omp::ExitDataOp exitDataOp) { @@ -1821,8 +1906,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, RTLFn = llvm::omp::OMPRTL___tgt_target_data_end_mapper; mapOperands = exitDataOp.getMapOperands(); - mapTypes = getMapTypes(exitDataOp.getMapOperands(), - exitDataOp->getContext()); return success(); }) .Default([&](Operation *op) { @@ -1835,17 +1918,20 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + MapInfoData mapData; + collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, DL, + builder); + // Fill up the arrays with all the mapped variables. llvm::OpenMPIRBuilder::MapInfosTy combinedInfo; auto genMapInfoCB = [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { builder.restoreIP(codeGenIP); - if (auto DataOp = dyn_cast(op)) { - genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapOperands, - mapTypes, useDevPtrOperands, useDevAddrOperands); + if (auto dataOp = dyn_cast(op)) { + genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData, + useDevPtrOperands, useDevAddrOperands); } else { - genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapOperands, - mapTypes); + genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData); } return combinedInfo; }; @@ -1997,61 +2083,232 @@ static bool targetOpSupported(Operation &opInst) { } static void -handleDeclareTargetMapVar(llvm::ArrayRef mapOperands, +handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder) { - for (const mlir::Value &mapOp : mapOperands) { - auto mapInfoOp = - mlir::dyn_cast(mapOp.getDefiningOp()); - llvm::Value *mapOpValue = - moduleTranslation.lookupValue(mapInfoOp.getVarPtr()); - if (auto *declareTarget = getRefPtrIfDeclareTarget(mapInfoOp.getVarPtr(), - moduleTranslation)) { - // The user's iterator will get invalidated if we modify an element, + for (size_t i = 0; i < mapData.MapClause.size(); ++i) { + // In the case of declare target mapped variables, the basePointer is + // the reference pointer generated by the convertDeclareTargetAttr + // method. Whereas the kernelValue is the original variable, so for + // the device we must replace all uses of this original global variable + // (stored in kernelValue) with the reference pointer (stored in + // basePointer for declare target mapped variables), as for device the + // data is mapped into this reference pointer and should be loaded + // from it, the original variable is discarded. On host both exist and + // metadata is generated (elsewhere in the convertDeclareTargetAttr) + // function to link the two variables in the runtime and then both the + // reference pointer and the pointer are assigned in the kernel argument + // structure for the host. + if (mapData.IsDeclareTarget[i]) { + // The users iterator will get invalidated if we modify an element, // so we populate this vector of uses to alter each user on an individual // basis to emit its own load (rather than one load for all). llvm::SmallVector userVec; - for (llvm::User *user : mapOpValue->users()) + for (llvm::User *user : mapData.OriginalValue[i]->users()) userVec.push_back(user); for (llvm::User *user : userVec) { if (auto *insn = dyn_cast(user)) { - auto *load = builder.CreateLoad( - moduleTranslation.convertType(mapInfoOp.getVarPtr().getType()), - declareTarget); + auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(), + mapData.BasePointers[i]); load->moveBefore(insn); - user->replaceUsesOfWith(mapOpValue, load); + user->replaceUsesOfWith(mapData.OriginalValue[i], load); } } } } } +// The createDeviceArgumentAccessor function generates +// instructions for retrieving (acessing) kernel +// arguments inside of the device kernel for use by +// the kernel. This enables different semantics such as +// the creation of temporary copies of data allowing +// semantics like read-only/no host write back kernel +// arguments. +// +// This currently implements a very light version of Clang's +// EmitParmDecl's handling of direct argument handling as well +// as a portion of the argument access generation based on +// capture types found at the end of emitOutlinedFunctionPrologue +// in Clang. The indirect path handling of EmitParmDecl's may be +// required for future work, but a direct 1-to-1 copy doesn't seem +// possible as the logic is rather scattered throughout Clang's +// lowering and perhaps we wish to deviate slightly. +// +// \param mapData - A container containing vectors of information +// corresponding to the input argument, which should have a +// corresponding entry in the MapInfoData containers +// OrigialValue's. +// \param arg - This is the generated kernel function argument that +// corresponds to the passed in input argument. We generated different +// accesses of this Argument, based on capture type and other Input +// related information. +// \param input - This is the host side value that will be passed to +// the kernel i.e. the kernel input, we rewrite all uses of this within +// the kernel (as we generate the kernel body based on the target's region +// which maintians references to the original input) to the retVal argument +// apon exit of this function inside of the OMPIRBuilder. This interlinks +// the kernel argument to future uses of it in the function providing +// appropriate "glue" instructions inbetween. +// \param retVal - This is the value that all uses of input inside of the +// kernel will be re-written to, the goal of this function is to generate +// an appropriate location for the kernel argument to be accessed from, +// e.g. ByRef will result in a temporary allocation location and then +// a store of the kernel argument into this allocated memory which +// will then be loaded from, ByCopy will use the allocated memory +// directly. static llvm::IRBuilderBase::InsertPoint -createDeviceArgumentAccessor(llvm::Argument &arg, llvm::Value *input, - llvm::Value *&retVal, llvm::IRBuilderBase &builder, +createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, + llvm::Value *input, llvm::Value *&retVal, + llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP) { builder.restoreIP(allocaIP); - llvm::Value *addr = + mlir::omp::VariableCaptureKind capture = + mlir::omp::VariableCaptureKind::ByRef; + llvm::Type *inputType = input->getType(); + + // Find the associated MapInfoData entry for the current input + for (size_t i = 0; i < mapData.MapClause.size(); ++i) + if (mapData.OriginalValue[i] == input) { + if (auto mapOp = mlir::dyn_cast_if_present( + mapData.MapClause[i])) { + capture = mapOp.getMapCaptureType().value_or( + mlir::omp::VariableCaptureKind::ByRef); + } + + inputType = mapData.BaseType[i]; + break; + } + + unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace(); + unsigned int defaultAS = + ompBuilder.M.getDataLayout().getProgramAddressSpace(); + + // Create the alloca for the argument the current point. + llvm::Value *v = builder.CreateAlloca(arg.getType()->isPointerTy() ? arg.getType() : llvm::Type::getInt64Ty(builder.getContext()), ompBuilder.M.getDataLayout().getAllocaAddrSpace()); - llvm::Value *addrAscast = - arg.getType()->isPointerTy() - ? builder.CreatePointerBitCastOrAddrSpaceCast(addr, input->getType()) - : addr; - builder.CreateStore(&arg, addrAscast); + if (allocaAS != defaultAS && arg.getType()->isPointerTy()) { + v = builder.CreatePointerBitCastOrAddrSpaceCast( + v, arg.getType()->getPointerTo(defaultAS)); + } + + builder.CreateStore(&arg, v); + builder.restoreIP(codeGenIP); - retVal = builder.CreateLoad(arg.getType(), addrAscast); + + switch (capture) { + case mlir::omp::VariableCaptureKind::ByCopy: { + // RHS of || aims to ignore conversions like int -> uint, but further + // extension of this path must be implemented for the moment it'll fall + // through to the assert. + if (inputType->isPointerTy() || v->getType() == inputType->getPointerTo()) { + retVal = v; + return builder.saveIP(); + } + + assert(false && "Currently unsupported OMPTargetVarCaptureByCopy Type"); + break; + } + case mlir::omp::VariableCaptureKind::ByRef: { + retVal = builder.CreateAlignedLoad( + v->getType(), v, + ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType())); + break; + } + case mlir::omp::VariableCaptureKind::This: + case mlir::omp::VariableCaptureKind::VLAType: + assert(false && "Currently unsupported capture kind"); + break; + } + return builder.saveIP(); } +// This is a variation on Clang's GenerateOpenMPCapturedVars, which +// generates different operation (e.g. load/store) combinations for +// arguments to the kernel, based on map capture kinds which are then +// utilised in the combinedInfo in place of the original Map value. +static void +createAlteredByCaptureMap(MapInfoData &mapData, + LLVM::ModuleTranslation &moduleTranslation, + llvm::IRBuilderBase &builder) { + for (size_t i = 0; i < mapData.MapClause.size(); ++i) { + // if it's declare target, skip it, it's handled seperately. + if (!mapData.IsDeclareTarget[i]) { + mlir::omp::VariableCaptureKind captureKind = + mlir::omp::VariableCaptureKind::ByRef; + + if (auto mapOp = mlir::dyn_cast_if_present( + mapData.MapClause[i])) { + captureKind = mapOp.getMapCaptureType().value_or( + mlir::omp::VariableCaptureKind::ByRef); + } + + switch (captureKind) { + case mlir::omp::VariableCaptureKind::ByRef: { + // Currently handles array sectioning lowerbound case, but more + // logic may be required in the future. Clang invokes EmitLValue, + // which has specialised logic for special Clang types such as user + // defines, so it is possible we will have to extend this for + // structures or other complex types. As the general idea is that this + // function mimics some of the logic from Clang that we require for + // kernel argument passing from host -> device. + if (auto mapOp = mlir::dyn_cast_if_present( + mapData.MapClause[i])) { + if (!mapOp.getBounds().empty() && mapData.BaseType[i]->isArrayTy()) { + + std::vector idx = + std::vector{builder.getInt64(0)}; + for (int i = mapOp.getBounds().size() - 1; i >= 0; --i) { + if (auto boundOp = + mlir::dyn_cast_if_present( + mapOp.getBounds()[i].getDefiningOp())) { + idx.push_back( + moduleTranslation.lookupValue(boundOp.getLowerBound())); + } + } + + mapData.Pointers[i] = builder.CreateInBoundsGEP( + mapData.BaseType[i], mapData.Pointers[i], idx); + } + } + } break; + case mlir::omp::VariableCaptureKind::ByCopy: { + llvm::Type *type = mapData.BaseType[i]; + llvm::Value *newV = builder.CreateLoad(type, mapData.Pointers[i]); + + if (!type->isPointerTy()) { + auto curInsert = builder.saveIP(); + builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation)); + auto *memTempAlloc = + builder.CreateAlloca(builder.getInt8PtrTy(), nullptr, ".casted"); + builder.restoreIP(curInsert); + + builder.CreateStore(newV, memTempAlloc); + newV = builder.CreateLoad(builder.getInt8PtrTy(), memTempAlloc); + } + + mapData.Pointers[i] = newV; + mapData.BasePointers[i] = newV; + } break; + case mlir::omp::VariableCaptureKind::This: + case mlir::omp::VariableCaptureKind::VLAType: + mapData.MapClause[i]->emitOpError("Unhandled capture kind"); + break; + } + } + } +} + static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -2062,26 +2319,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, auto targetOp = cast(opInst); auto &targetRegion = targetOp.getRegion(); - // This function filters out kernel data that will not show up as kernel - // input arguments to the generated kernel function but will still need - // explicitly mapped through supplying information to the OpenMP runtime - // (declare target). It also prepares some data used for generating the - // kernel and populating the associated OpenMP runtime data structures. - auto getKernelArguments = - [&](const llvm::SetVector &operandSet, - llvm::SmallVectorImpl &llvmInputs) { - for (Value operand : operandSet) { - if (!getRefPtrIfDeclareTarget(operand, moduleTranslation)) - llvmInputs.push_back(moduleTranslation.lookupValue(operand)); - } - }; - - llvm::SetVector operandSet; - getUsedValuesDefinedAbove(targetRegion, operandSet); - - llvm::SmallVector inputs; - getKernelArguments(operandSet, inputs); - LogicalResult bodyGenStatus = success(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; @@ -2115,31 +2352,32 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); - DataLayout DL = DataLayout(opInst.getParentOfType()); - SmallVector mapOperands = targetOp.getMapOperands(); - - auto getMapTypes = [](mlir::OperandRange mapOperands, - mlir::MLIRContext *ctx) { - SmallVector mapTypes; - for (auto mapValue : mapOperands) { - if (mapValue.getDefiningOp()) { - auto mapOp = - mlir::dyn_cast(mapValue.getDefiningOp()); - mapTypes.push_back(mapOp.getMapTypeAttr()); - } - } - return mlir::ArrayAttr::get(ctx, mapTypes); - }; - - ArrayAttr mapTypes = - getMapTypes(targetOp.getMapOperands(), targetOp->getContext()); + DataLayout dl = DataLayout(opInst.getParentOfType()); + llvm::SmallVector mapOperands = targetOp.getMapOperands(); + MapInfoData mapData; + collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl, + builder); + + // We wish to modify some of the methods in which kernel arguments are + // passed based on their capture type by the target region, this can + // involve generating new loads and stores, which changes the + // MLIR value to LLVM value mapping, however, we only wish to do this + // locally for the current function/target and also avoid altering + // ModuleTranslation, so we remap the base pointer or pointer stored + // in the map infos corresponding MapInfoData, which is later accessed + // by genMapInfos and createTarget to help generate the kernel and + // kernel arg structure. It primarily becomes relevant in cases like + // bycopy, or byref range'd arrays. In the default case, we simply + // pass thee pointer byref as both basePointer and pointer. + if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) + createAlteredByCaptureMap(mapData, moduleTranslation, builder); llvm::OpenMPIRBuilder::MapInfosTy combinedInfos; auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { builder.restoreIP(codeGenIP); - genMapInfos(builder, moduleTranslation, DL, combinedInfos, mapOperands, - mapTypes, {}, {}, true); + genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, {}, {}, + true); return combinedInfos; }; @@ -2158,22 +2396,27 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, return codeGenIP; } - return createDeviceArgumentAccessor(arg, input, retVal, builder, + return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder, *ompBuilder, moduleTranslation, allocaIP, codeGenIP); }; + llvm::SmallVector kernelInput; + for (size_t i = 0; i < mapData.MapClause.size(); ++i) { + // declare target arguments are not passed to kernels as arguments + if (!mapData.IsDeclareTarget[i]) + kernelInput.push_back(mapData.OriginalValue[i]); + } + builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget( ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams, - defaultValThreads, inputs, genMapInfoCB, bodyCB, argAccessorCB)); + defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB)); // Remap access operations to declare target reference pointers for the // device, essentially generating extra loadop's as necessary - if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) { - SmallVector mapOperands = targetOp.getMapOperands(); - handleDeclareTargetMapVar(llvm::ArrayRef(mapOperands), moduleTranslation, - builder); - } + if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) + handleDeclareTargetMapVar(mapData, moduleTranslation, builder); + return bodyGenStatus; } diff --git a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir new file mode 100644 index 00000000000000..056085123480ba --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir @@ -0,0 +1,56 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// This test checks the offload sizes provided to the OpenMP kernel argument +// structure are correct when lowering to LLVM-IR from MLIR with 3-D bounds +// provided for a 3-D array. One with full default size, and the other with +// a user specified OpenMP array sectioning. We expect the default sized +// array bounds to lower to the full size of the array and the sectioned +// array to be the size of 3*3*1*element-byte-size (36 bytes in this case). + +module attributes {omp.is_target_device = false} { + llvm.func @_3d_target_array_section() { + %0 = llvm.mlir.addressof @_QFEinarray : !llvm.ptr + %1 = llvm.mlir.addressof @_QFEoutarray : !llvm.ptr + %2 = llvm.mlir.constant(1 : index) : i64 + %3 = llvm.mlir.constant(0 : index) : i64 + %4 = llvm.mlir.constant(2 : index) : i64 + %5 = omp.bounds lower_bound(%3 : i64) upper_bound(%4 : i64) stride(%2 : i64) start_idx(%2 : i64) + %6 = omp.bounds lower_bound(%2 : i64) upper_bound(%2 : i64) stride(%2 : i64) start_idx(%2 : i64) + %7 = omp.map_info var_ptr(%0 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>) map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %6) -> !llvm.ptr {name = "inarray(1:3,1:3,2:2)"} + %8 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>) map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %5) -> !llvm.ptr {name = "outarray(1:3,1:3,1:3)"} + omp.target map_entries(%7, %8 : !llvm.ptr, !llvm.ptr) { + %9 = llvm.mlir.constant(0 : i64) : i64 + %10 = llvm.mlir.constant(1 : i64) : i64 + %11 = llvm.getelementptr %0[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>> + %12 = llvm.load %11 : !llvm.ptr -> i32 + %13 = llvm.getelementptr %1[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>> + llvm.store %12, %13 : i32, !llvm.ptr + omp.terminator + } + llvm.return + } + llvm.mlir.global internal @_QFEinarray() {addr_space = 0 : i32} : !llvm.array<3 x array<3 x array<3 x i32>>> { + %0 = llvm.mlir.zero : !llvm.array<3 x array<3 x array<3 x i32>>> + llvm.return %0 : !llvm.array<3 x array<3 x array<3 x i32>>> + } + llvm.mlir.global internal @_QFEoutarray() {addr_space = 0 : i32} : !llvm.array<3 x array<3 x array<3 x i32>>> { + %0 = llvm.mlir.zero : !llvm.array<3 x array<3 x array<3 x i32>>> + llvm.return %0 : !llvm.array<3 x array<3 x array<3 x i32>>> + } +} + +// CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 36, i64 108] +// CHECK: @.offload_maptypes = private unnamed_addr constant [2 x i64] [i64 35, i64 35] +// CHECKL: @.offload_mapnames = private constant [2 x ptr] [ptr @0, ptr @1] + +// CHECK: define void @_3d_target_array_section() + +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 +// CHECK: store ptr @_QFEinarray, ptr %[[OFFLOADBASEPTRS]], align 8 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0 +// CHECK: store ptr getelementptr inbounds ([3 x [3 x [3 x i32]]], ptr @_QFEinarray, i64 0, i64 1, i64 0, i64 0), ptr %[[OFFLOADPTRS]], align 8 + +// CHECK: %[[OFFLOADBASEPTRS2:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 +// CHECK: store ptr @_QFEoutarray, ptr %[[OFFLOADBASEPTRS2]], align 8 +// CHECK: %[[OFFLOADPTRS2:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1 +// CHECK: store ptr @_QFEoutarray, ptr %[[OFFLOADPTRS2]], align 8 diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir new file mode 100644 index 00000000000000..cf6b7257ac606d --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir @@ -0,0 +1,41 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +module attributes {omp.is_target_device = true} { + llvm.func @_QQmain() attributes {fir.bindc_name = "main"} { + %0 = llvm.mlir.addressof @_QFEi : !llvm.ptr + %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr + %2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"} + %3 = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"} + omp.target map_entries(%2, %3 : !llvm.ptr, !llvm.ptr) { + %4 = llvm.load %0 : !llvm.ptr -> i32 + llvm.store %4, %1 : i32, !llvm.ptr + omp.terminator + } + llvm.return + } + llvm.mlir.global internal @_QFEi() {addr_space = 0 : i32} : i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.return %0 : i32 + } + llvm.mlir.global internal @_QFEsp() {addr_space = 0 : i32} : i32 { + %0 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %0 : i32 + } +} + +// CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}__QQmain_l{{.*}}(ptr %[[ARG_BYREF:.*]], ptr %[[ARG_BYCOPY:.*]]) { + +// CHECK: entry: +// CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8 +// CHECK: store ptr %[[ARG_BYREF]], ptr %[[ALLOCA_BYREF]], align 8 +// CHECK: %[[ALLOCA_BYCOPY:.*]] = alloca ptr, align 8 +// CHECK: store ptr %[[ARG_BYCOPY]], ptr %[[ALLOCA_BYCOPY]], align 8 + +// CHECK: user_code.entry: ; preds = %entry +// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_BYREF]], align 8 +// CHECK: br label %omp.target + +// CHECK: omp.target: ; preds = %user_code.entry +// CHECK: %[[VAL_LOAD_BYCOPY:.*]] = load i32, ptr %[[ALLOCA_BYCOPY]], align 4 +// CHECK: store i32 %[[VAL_LOAD_BYCOPY]], ptr %[[LOAD_BYREF]], align 4 +// CHECK: br label %omp.region.cont diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir new file mode 100644 index 00000000000000..ca5dad8b4fc9a8 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir @@ -0,0 +1,42 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +module attributes {omp.is_target_device = false} { + llvm.func @_QQmain() attributes {fir.bindc_name = "main"} { + %0 = llvm.mlir.addressof @_QFEi : !llvm.ptr + %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr + %2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"} + %3 = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"} + omp.target map_entries(%2, %3 : !llvm.ptr, !llvm.ptr) { + %4 = llvm.load %0 : !llvm.ptr -> i32 + llvm.store %4, %1 : i32, !llvm.ptr + omp.terminator + } + llvm.return + } + llvm.mlir.global internal @_QFEi() {addr_space = 0 : i32} : i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.return %0 : i32 + } + llvm.mlir.global internal @_QFEsp() {addr_space = 0 : i32} : i32 { + %0 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %0 : i32 + } +} + +// CHECK: define void @_QQmain() { +// CHECK: %[[BYCOPY_ALLOCA:.*]] = alloca ptr, align 8 + +// CHECK: entry: ; preds = %0 +// CHECK: %[[LOAD_VAL:.*]] = load i32, ptr @_QFEi, align 4 +// CHECK: store i32 %[[LOAD_VAL]], ptr %[[BYCOPY_ALLOCA]], align 4 +// CHECK: %[[BYCOPY_LOAD:.*]] = load ptr, ptr %[[BYCOPY_ALLOCA]], align 8 + +// CHECK: %[[BASEPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 +// CHECK: store ptr @_QFEsp, ptr %[[BASEPTR_BYREF]], align 8 +// CHECK: %[[OFFLOADPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0 +// CHECK: store ptr @_QFEsp, ptr %[[OFFLOADPTR_BYREF]], align 8 + +// CHECK: %[[BASEPTR_BYCOPY:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 +// CHECK: store ptr %[[BYCOPY_LOAD]], ptr %[[BASEPTR_BYCOPY]], align 8 +// CHECK: %[[OFFLOADPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1 +// CHECK: store ptr %[[BYCOPY_LOAD]], ptr %[[OFFLOADPTR_BYREF]], align 8 diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir index f2431ec87933f9..9221b410d766ed 100644 --- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -38,15 +38,20 @@ llvm.func @_QPopenmp_target_data() { // ----- -llvm.func @_QPopenmp_target_data_region(%1 : !llvm.ptr) { - %2 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%2 : !llvm.ptr) { - %3 = llvm.mlir.constant(99 : i32) : i32 - %4 = llvm.mlir.constant(1 : i64) : i64 - %5 = llvm.mlir.constant(1 : i64) : i64 - %6 = llvm.mlir.constant(0 : i64) : i64 - %7 = llvm.getelementptr %1[0, %6] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1024 x i32> - llvm.store %3, %7 : i32, !llvm.ptr +llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) { + %1 = llvm.mlir.constant(1023 : index) : i64 + %2 = llvm.mlir.constant(0 : index) : i64 + %3 = llvm.mlir.constant(1024 : index) : i64 + %4 = llvm.mlir.constant(1 : index) : i64 + %5 = omp.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%3 : i64) stride(%4 : i64) start_idx(%4 : i64) + %6 = omp.map_info var_ptr(%0 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%6 : !llvm.ptr) { + %7 = llvm.mlir.constant(99 : i32) : i32 + %8 = llvm.mlir.constant(1 : i64) : i64 + %9 = llvm.mlir.constant(1 : i64) : i64 + %10 = llvm.mlir.constant(0 : i64) : i64 + %11 = llvm.getelementptr %0[0, %10] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1024 x i32> + llvm.store %7, %11 : i32, !llvm.ptr omp.terminator } llvm.return @@ -92,16 +97,36 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { %11 = llvm.mlir.constant(10 : i32) : i32 %12 = llvm.icmp "slt" %10, %11 : i32 %13 = llvm.load %5 : !llvm.ptr -> i32 - %map1 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} - %map2 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""} + %14 = llvm.mlir.constant(1023 : index) : i64 + %15 = llvm.mlir.constant(0 : index) : i64 + %16 = llvm.mlir.constant(1024 : index) : i64 + %17 = llvm.mlir.constant(1 : index) : i64 + %18 = omp.bounds lower_bound(%15 : i64) upper_bound(%14 : i64) extent(%16 : i64) stride(%17 : i64) start_idx(%17 : i64) + %map1 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(to) capture(ByRef) bounds(%18) -> !llvm.ptr {name = ""} + %19 = llvm.mlir.constant(511 : index) : i64 + %20 = llvm.mlir.constant(0 : index) : i64 + %21 = llvm.mlir.constant(512 : index) : i64 + %22 = llvm.mlir.constant(1 : index) : i64 + %23 = omp.bounds lower_bound(%20 : i64) upper_bound(%19 : i64) extent(%21 : i64) stride(%22 : i64) start_idx(%22 : i64) + %map2 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%23) -> !llvm.ptr {name = ""} omp.target_enter_data if(%12 : i1) device(%13 : i32) map_entries(%map1, %map2 : !llvm.ptr, !llvm.ptr) - %14 = llvm.load %7 : !llvm.ptr -> i32 - %15 = llvm.mlir.constant(10 : i32) : i32 - %16 = llvm.icmp "sgt" %14, %15 : i32 - %17 = llvm.load %5 : !llvm.ptr -> i32 - %map3 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - %map4 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_exit_data if(%16 : i1) device(%17 : i32) map_entries(%map3, %map4 : !llvm.ptr, !llvm.ptr) + %24 = llvm.load %7 : !llvm.ptr -> i32 + %25 = llvm.mlir.constant(10 : i32) : i32 + %26 = llvm.icmp "sgt" %24, %25 : i32 + %27 = llvm.load %5 : !llvm.ptr -> i32 + %28 = llvm.mlir.constant(1023 : index) : i64 + %29 = llvm.mlir.constant(0 : index) : i64 + %30 = llvm.mlir.constant(1024 : index) : i64 + %31 = llvm.mlir.constant(1 : index) : i64 + %32 = omp.bounds lower_bound(%29 : i64) upper_bound(%28 : i64) extent(%30 : i64) stride(%31 : i64) start_idx(%31 : i64) + %map3 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%32) -> !llvm.ptr {name = ""} + %33 = llvm.mlir.constant(511 : index) : i64 + %34 = llvm.mlir.constant(0 : index) : i64 + %35 = llvm.mlir.constant(512 : index) : i64 + %36 = llvm.mlir.constant(1 : index) : i64 + %37 = omp.bounds lower_bound(%34 : i64) upper_bound(%33 : i64) extent(%35 : i64) stride(%36 : i64) start_idx(%36 : i64) + %map4 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%37) -> !llvm.ptr {name = ""} + omp.target_exit_data if(%26 : i1) device(%27 : i32) map_entries(%map3, %map4 : !llvm.ptr, !llvm.ptr) llvm.return } diff --git a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir index 20ad6d30c2f52e..1d8799ecd446f0 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir @@ -38,7 +38,7 @@ module attributes {omp.is_target_device = false} { // CHECK: store ptr %[[ADDR_B]], ptr %[[GEP2]], align 8 // CHECK: %[[GEP3:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 2 // CHECK: store ptr %[[ADDR_C]], ptr %[[GEP3]], align 8 -// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par, ptr %[[STRUCTARG]]) +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @{{.*}}, i32 1, ptr @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par, ptr %[[STRUCTARG]]) // CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %[[STRUCTARG2:.*]]) #0 { diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 new file mode 100644 index 00000000000000..11d3b6936bcea2 --- /dev/null +++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 @@ -0,0 +1,27 @@ +! Basic offloading test of arrays with provided lower +! and upper bounds as specified by OpenMP's sectioning +! REQUIRES: flang, amdgcn-amd-amdhsa +! UNSUPPORTED: nvptx64-nvidia-cuda +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + implicit none + integer :: write_arr(10) = (/0,0,0,0,0,0,0,0,0,0/) + integer :: read_arr(10) = (/1,2,3,4,5,6,7,8,9,10/) + integer :: i = 2 + + !$omp target map(to:read_arr(2:5)) map(from:write_arr(2:5)) map(tofrom:i) + do i = 2, 5 + write_arr(i) = read_arr(i) + end do + !$omp end target + + print *, write_arr(:) +end program + +! CHECK: 0 2 3 4 5 0 0 0 0 0 diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 new file mode 100644 index 00000000000000..28b2afced4d1bc --- /dev/null +++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 @@ -0,0 +1,39 @@ +! Basic offloading test of a regular array explicitly +! passed within a target region +! REQUIRES: flang, amdgcn-amd-amdhsa +! UNSUPPORTED: nvptx64-nvidia-cuda +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + implicit none + integer :: inArray(3,3,3) + integer :: outArray(3,3,3) + integer :: i, j, k + + do i = 1, 3 + do j = 1, 3 + do k = 1, 3 + inArray(i, j, k) = 42 + outArray(i, j, k) = 0 + end do + end do + end do + +!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3), j, k) + do j = 1, 3 + do k = 1, 3 + outArray(k, j, 2) = inArray(k, j, 2) + end do + end do +!$omp end target + + print *, outArray + +end program + +! CHECK: 0 0 0 0 0 0 0 0 0 42 42 42 42 42 42 42 42 42 0 0 0 0 0 0 0 0 0 diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 new file mode 100644 index 00000000000000..58f42138ad0aff --- /dev/null +++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 @@ -0,0 +1,45 @@ +! Basic offloading test of a regular array explicitly +! passed within a target region +! REQUIRES: flang, amdgcn-amd-amdhsa +! UNSUPPORTED: nvptx64-nvidia-cuda +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + implicit none + integer :: x(2,2,2) + integer :: i = 1, j = 1, k = 1 + integer :: counter = 1 + do i = 1, 2 + do j = 1, 2 + do k = 1, 2 + x(i, j, k) = 0 + end do + end do + end do + +!$omp target map(tofrom:x, i, j, k, counter) + do i = 1, 2 + do j = 1, 2 + do k = 1, 2 + x(i, j, k) = counter + counter = counter + 1 + end do + end do + end do +!$omp end target + + do i = 1, 2 + do j = 1, 2 + do k = 1, 2 + print *, x(i, j, k) + end do + end do + end do +end program main + +! CHECK: 1 2 3 4 5 6 7 8 diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90 new file mode 100644 index 00000000000000..d3c799ff3334f4 --- /dev/null +++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90 @@ -0,0 +1,27 @@ +! Basic offloading test of a regular array explicitly +! passed within a target region +! REQUIRES: flang, amdgcn-amd-amdhsa +! UNSUPPORTED: nvptx64-nvidia-cuda +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + integer :: x(10) = (/0,0,0,0,0,0,0,0,0,0/) + integer :: i = 1 + integer :: j = 11 + + !$omp target map(tofrom:x, i, j) + do while (i <= j) + x(i) = i; + i = i + 1 + end do + !$omp end target + + PRINT *, x(:) +end program main + +! CHECK: 1 2 3 4 5 6 7 8 9 10 From 273ceb1337cdccb730a6930b50544c0ee7d7cd7e Mon Sep 17 00:00:00 2001 From: cor3ntin Date: Mon, 30 Oct 2023 16:05:53 +0100 Subject: [PATCH 025/144] [Clang] Diagnose defaulted assignment operator with incompatible object parameter (#70176) Per https://eel.is/c++draft/dcl.fct.def.default#2.2, the explicit object parameter of a defaulted special member function must be of the same type as the one of an equivalent implicitly defaulted function, ignoring references. Fixes #69233 --- .../clang/Basic/DiagnosticSemaKinds.td | 3 ++ clang/lib/Sema/SemaDeclCXX.cpp | 18 ++++++++ clang/test/SemaCXX/cxx2b-deducing-this.cpp | 41 +++++++++++++++++++ 3 files changed, 62 insertions(+) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 453bd8a9a34042..224c0df7f1fb71 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9483,6 +9483,9 @@ def err_defaulted_special_member_return_type : Error< def err_defaulted_special_member_quals : Error< "an explicitly-defaulted %select{copy|move}0 assignment operator may not " "have 'const'%select{, 'constexpr'|}1 or 'volatile' qualifiers">; +def err_defaulted_special_member_explicit_object_mismatch : Error< + "the type of the explicit object parameter of an explicitly-defaulted " + "%select{copy|move}0 assignment operator should match the type of the class %1">; def err_defaulted_special_member_volatile_param : Error< "the parameter for an explicitly-defaulted %sub{select_special_member_kind}0 " "may not be volatile">; diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index a3f68d4ffc0f6e..beb7e5b177c6e9 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -7748,6 +7748,24 @@ bool Sema::CheckExplicitlyDefaultedSpecialMember(CXXMethodDecl *MD, HadError = true; } } + // [C++23][dcl.fct.def.default]/p2.2 + // if F2 has an implicit object parameter of type “reference to C”, + // F1 may be an explicit object member function whose explicit object + // parameter is of (possibly different) type “reference to C”, + // in which case the type of F1 would differ from the type of F2 + // in that the type of F1 has an additional parameter; + if (!Context.hasSameType( + ThisType.getNonReferenceType().getUnqualifiedType(), + Context.getRecordType(RD))) { + if (DeleteOnTypeMismatch) + ShouldDeleteForTypeMismatch = true; + else { + Diag(MD->getLocation(), + diag::err_defaulted_special_member_explicit_object_mismatch) + << (CSM == CXXMoveAssignment) << RD << MD->getSourceRange(); + HadError = true; + } + } } // Check for parameter type matching. diff --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp b/clang/test/SemaCXX/cxx2b-deducing-this.cpp index 535381e876da9c..0033541fa322dc 100644 --- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp +++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp @@ -585,3 +585,44 @@ class Server : public Thing { S name_; }; } + +namespace GH69233 { +struct Base {}; +struct S : Base { + int j; + S& operator=(this Base& self, const S&) = default; + // expected-warning@-1 {{explicitly defaulted copy assignment operator is implicitly deleted}} + // expected-note@-2 {{function is implicitly deleted because its declared type does not match the type of an implicit copy assignment operator}} + // expected-note@-3 {{explicitly defaulted function was implicitly deleted here}} +}; + +struct S2 { + S2& operator=(this int&& self, const S2&); + S2& operator=(this int&& self, S2&&); + operator int(); +}; + +S2& S2::operator=(this int&& self, const S2&) = default; +// expected-error@-1 {{the type of the explicit object parameter of an explicitly-defaulted copy assignment operator should match the type of the class 'S2'}} + +S2& S2::operator=(this int&& self, S2&&) = default; +// expected-error@-1 {{the type of the explicit object parameter of an explicitly-defaulted move assignment operator should match the type of the class 'S2'}} + +struct Move { + Move& operator=(this int&, Move&&) = default; + // expected-warning@-1 {{explicitly defaulted move assignment operator is implicitly deleted}} + // expected-note@-2 {{function is implicitly deleted because its declared type does not match the type of an implicit move assignment operator}} + // expected-note@-3 {{copy assignment operator is implicitly deleted because 'Move' has a user-declared move assignment operator}} +}; + +void test() { + S s; + s = s; // expected-error {{object of type 'S' cannot be assigned because its copy assignment operator is implicitly deleted}} + S2 s2; + s2 = s2; + + Move m; + m = Move{}; // expected-error {{object of type 'Move' cannot be assigned because its copy assignment operator is implicitly deleted}} +} + +} From ee6d62db997bf61ef17a9f888c240f60656dc2db Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 30 Oct 2023 08:07:43 -0700 Subject: [PATCH 026/144] [AMDGPU] Prevent folding of the negative i32 literals as i64 (#70274) We can use sign extended 64-bit literals, but only for signed operands. At the moment we do not know if an operand is signed. Such operand will be encoded as its low 32 bits and then either correctly sign extended or incorrectly zero extended by HW. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 15 +- .../AMDGPU/fold-short-64-bit-literals.mir | 6 +- .../CodeGen/AMDGPU/folding-of-i32-as-i64.mir | 128 ++++++++++++++++++ 3 files changed, 145 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index fae43bf30a3f6b..f1e375ee52cb86 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5611,9 +5611,18 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2INT32 || OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32; - if (Is64BitOp && !AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) && - !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) - return false; + if (Is64BitOp && + !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) { + if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp)) + return false; + + // FIXME: We can use sign extended 64-bit literals, but only for signed + // operands. At the moment we do not know if an operand is signed. + // Such operand will be encoded as its low 32 bits and then either + // correctly sign extended or incorrectly zero extended by HW. + if (!Is64BitFPOp && (int32_t)Imm < 0) + return false; + } } // Handle non-register types that are treated like immediates. diff --git a/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir b/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir index 6e975c8a537075..69c6a858162f39 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir @@ -84,6 +84,9 @@ body: | SI_RETURN_TO_EPILOG %2 ... +# FIXME: This could be folded, but we do not know if operand of S_AND_B64 is signed or unsigned +# and if it will be sign or zero extended. + --- name: fold_uint_32bit_literal_sgpr tracksRegLiveness: true @@ -92,7 +95,8 @@ body: | ; GCN-LABEL: name: fold_uint_32bit_literal_sgpr ; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[DEF]], 4294967295, implicit-def $scc + ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 4294967295 + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[DEF]], [[S_MOV_B64_]], implicit-def $scc ; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_AND_B64_]] %0:sreg_64 = IMPLICIT_DEF %1:sreg_64 = S_MOV_B64 4294967295 diff --git a/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir b/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir new file mode 100644 index 00000000000000..30cb88d5789fd3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir @@ -0,0 +1,128 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fold-operands -o - %s | FileCheck -check-prefix=GCN %s + +# The constant is 0xffffffff80000000. It is 64-bit negative constant, but it passes the test +# isInt<32>(). Nonetheless it is not a legal literal for a binary or unsigned operand and +# cannot be used right in the shift as HW will zero extend it. + +--- +name: imm64_shift_int32_const_0xffffffff80000000 +body: | + bb.0: + ; GCN-LABEL: name: imm64_shift_int32_const_0xffffffff80000000 + ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -2147483648 + ; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744071562067968 + %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc + S_ENDPGM 0, implicit %1 + +... + +--- +name: imm64_shift_int32_const_0xffffffff +body: | + bb.0: + ; GCN-LABEL: name: imm64_shift_int32_const_0xffffffff + ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295 + ; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295 + %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc + S_ENDPGM 0, implicit %1 + +... + +--- +name: imm64_shift_int32_const_0x80000000 +body: | + bb.0: + ; GCN-LABEL: name: imm64_shift_int32_const_0x80000000 + ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483648 + ; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483648 + %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc + S_ENDPGM 0, implicit %1 + +... + +--- +name: imm64_shift_int32_const_0x7fffffff +body: | + bb.0: + ; GCN-LABEL: name: imm64_shift_int32_const_0x7fffffff + ; GCN: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 2147483647, 1, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483647 + %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc + S_ENDPGM 0, implicit %1 + +... + +--- +name: imm64_shift_int32_const_0x1ffffffff +body: | + bb.0: + ; GCN-LABEL: name: imm64_shift_int32_const_0x1ffffffff + ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 8589934591 + ; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_MOV_B]], 1, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 8589934591 + %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc + S_ENDPGM 0, implicit %1 + +... + +--- +name: imm64_shift_int32_const_0xffffffffffffffff +body: | + bb.0: + ; GCN-LABEL: name: imm64_shift_int32_const_0xffffffffffffffff + ; GCN: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 -1, 1, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO -1 + %1:sreg_64 = S_LSHL_B64 %0, 1, implicit-def $scc + S_ENDPGM 0, implicit %1 + +... + +--- +name: imm64_ashr_int32_const_0xffffffff +body: | + bb.0: + ; GCN-LABEL: name: imm64_ashr_int32_const_0xffffffff + ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295 + ; GCN-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[S_MOV_B]], 1, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295 + %1:sreg_64 = S_ASHR_I64 %0, 1, implicit-def $scc + S_ENDPGM 0, implicit %1 + +... + +--- +name: imm64_ashr_int32_const_0x7fffffff +body: | + bb.0: + ; GCN-LABEL: name: imm64_ashr_int32_const_0x7fffffff + ; GCN: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 2147483647, 1, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 2147483647 + %1:sreg_64 = S_ASHR_I64 %0, 1, implicit-def $scc + S_ENDPGM 0, implicit %1 + +... + +--- +name: imm64_ashr_int32_const_0xffffffffffffffff +body: | + bb.0: + ; GCN-LABEL: name: imm64_ashr_int32_const_0xffffffffffffffff + ; GCN: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 -1, 1, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] + %0:sreg_64 = S_MOV_B64_IMM_PSEUDO -1 + %1:sreg_64 = S_ASHR_I64 %0, 1, implicit-def $scc + S_ENDPGM 0, implicit %1 + +... From fe8335babba1725e18d6ea94073c3dbb92958bfa Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 30 Oct 2023 08:12:28 -0700 Subject: [PATCH 027/144] [AMDGPU] Select 64-bit imm moves if can be encoded as 32 bit operand (#70395) This allows folding of 64-bit operands if fit into 32-bit. Fixes https://github.com/llvm/llvm-project/issues/67781 --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 13 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 8 + llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 2 +- llvm/lib/Target/AMDGPU/SIInstructions.td | 23 + .../AMDGPU/GlobalISel/combine-short-clamp.ll | 4 +- .../AMDGPU/GlobalISel/extractelement.ll | 124 +- .../AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll | 24 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll | 868 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll | 866 +- .../AMDGPU/GlobalISel/insertelement.i16.ll | 52 +- .../AMDGPU/GlobalISel/insertelement.i8.ll | 16 + .../AMDGPU/GlobalISel/insertelement.ll | 24 +- ...inst-select-amdgpu-atomic-cmpxchg-flat.mir | 117 +- ...st-select-amdgpu-atomic-cmpxchg-global.mir | 136 +- .../inst-select-atomicrmw-add-flat.mir | 330 +- .../inst-select-atomicrmw-add-global.mir | 320 +- .../GlobalISel/inst-select-constant.mir | 360 +- .../GlobalISel/inst-select-fconstant.mir | 42 +- .../AMDGPU/GlobalISel/inst-select-fmul.mir | 93 +- .../GlobalISel/inst-select-fract.f64.mir | 13 +- .../inst-select-load-atomic-flat.mir | 102 +- .../inst-select-load-atomic-global.mir | 156 +- .../GlobalISel/inst-select-load-constant.mir | 54 +- .../GlobalISel/inst-select-load-flat.mir | 772 +- .../inst-select-load-global-saddr.mir | 72 +- .../GlobalISel/inst-select-load-global.mir | 708 +- .../GlobalISel/inst-select-load-smrd.mir | 2 +- .../AMDGPU/GlobalISel/inst-select-ptrmask.mir | 36 +- .../GlobalISel/inst-select-store-flat.mir | 120 +- .../GlobalISel/inst-select-store-global.mir | 24 +- .../GlobalISel/llvm.amdgcn.div.scale.ll | 26 +- .../llvm.amdgcn.global.atomic.csub.ll | 30 +- .../GlobalISel/llvm.amdgcn.mfma.gfx90a.ll | 36 +- .../CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll | 7 +- .../CodeGen/AMDGPU/GlobalISel/llvm.memset.ll | 7 +- .../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll | 305 +- .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 280 +- .../CodeGen/AMDGPU/GlobalISel/udiv.i64.ll | 191 +- .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll | 175 +- .../AMDGPU/agpr-copy-no-free-registers.ll | 18 +- .../AMDGPU/amdgpu-codegenprepare-idiv.ll | 176 +- .../atomic_optimizations_local_pointer.ll | 16 +- .../CodeGen/AMDGPU/combine_andor_with_cmps.ll | 3 +- llvm/test/CodeGen/AMDGPU/commute-compares.ll | 4 +- llvm/test/CodeGen/AMDGPU/constrained-shift.ll | 3 +- llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll | 824 +- .../AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll | 115 +- .../CodeGen/AMDGPU/fp64-atomics-gfx90a.ll | 32 +- llvm/test/CodeGen/AMDGPU/fract-match.ll | 15 +- llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll | 284 +- llvm/test/CodeGen/AMDGPU/global_atomics.ll | 42 +- .../AMDGPU/global_atomics_scan_fadd.ll | 324 +- .../AMDGPU/global_atomics_scan_fsub.ll | 324 +- llvm/test/CodeGen/AMDGPU/inline-asm.ll | 5 +- .../CodeGen/AMDGPU/insert-delay-alu-bug.ll | 9 +- .../CodeGen/AMDGPU/insert_vector_dynelt.ll | 37 +- llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 42 +- .../CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 52 +- .../ipra-return-address-save-restore.ll | 2 +- .../CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll | 322 +- .../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll | 236 +- .../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll | 672 +- .../CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll | 120 +- .../CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll | 450 +- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll | 4 +- llvm/test/CodeGen/AMDGPU/llvm.frexp.ll | 72 +- llvm/test/CodeGen/AMDGPU/load-constant-i1.ll | 77 +- llvm/test/CodeGen/AMDGPU/load-global-i16.ll | 849 +- llvm/test/CodeGen/AMDGPU/offset-split-flat.ll | 861 +- .../CodeGen/AMDGPU/offset-split-global.ll | 402 +- .../AMDGPU/reassoc-mul-add-1-to-mad.ll | 68 +- llvm/test/CodeGen/AMDGPU/rsq.f64.ll | 649 +- llvm/test/CodeGen/AMDGPU/salu-to-valu.ll | 8 +- llvm/test/CodeGen/AMDGPU/sdiv64.ll | 15 +- llvm/test/CodeGen/AMDGPU/shl.ll | 59 +- .../CodeGen/AMDGPU/spill-scavenge-offset.ll | 7359 ++++++++--------- .../AMDGPU/splitkit-getsubrangeformask.ll | 326 +- llvm/test/CodeGen/AMDGPU/srem64.ll | 15 +- llvm/test/CodeGen/AMDGPU/swdev380865.ll | 103 +- .../AMDGPU/tuple-allocation-failure.ll | 158 +- llvm/test/CodeGen/AMDGPU/udiv64.ll | 15 +- .../AMDGPU/unstructured-cfg-def-use-issue.ll | 10 +- llvm/test/CodeGen/AMDGPU/urem64.ll | 15 +- .../CodeGen/AMDGPU/use-sgpr-multiple-times.ll | 7 +- llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll | 6 +- 85 files changed, 10087 insertions(+), 11656 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index b5ceaaa14b4fd5..804ffb90b53024 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -595,11 +595,15 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { break; uint64_t Imm; - if (ConstantFPSDNode *FP = dyn_cast(N)) + if (ConstantFPSDNode *FP = dyn_cast(N)) { Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); - else { + if (AMDGPU::isValid32BitLiteral(Imm, true)) + break; + } else { ConstantSDNode *C = cast(N); Imm = C->getZExtValue(); + if (AMDGPU::isValid32BitLiteral(Imm, false)) + break; } SDLoc DL(N); @@ -3014,7 +3018,7 @@ bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { if (!RC || SIRI->isSGPRClass(RC)) return false; - if (RC != &AMDGPU::VS_32RegClass) { + if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) { AllUsesAcceptSReg = false; SDNode * User = *U; if (User->isMachineOpcode()) { @@ -3026,7 +3030,8 @@ bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) { unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs(); const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo); - if (CommutedRC == &AMDGPU::VS_32RegClass) + if (CommutedRC == &AMDGPU::VS_32RegClass || + CommutedRC == &AMDGPU::VS_64RegClass) AllUsesAcceptSReg = true; } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 31d72fb8cadd8a..2cf60f338105b1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2551,11 +2551,13 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { MachineOperand &ImmOp = I.getOperand(1); Register DstReg = I.getOperand(0).getReg(); unsigned Size = MRI->getType(DstReg).getSizeInBits(); + bool IsFP = false; // The AMDGPU backend only supports Imm operands and not CImm or FPImm. if (ImmOp.isFPImm()) { const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); ImmOp.ChangeToImmediate(Imm.getZExtValue()); + IsFP = true; } else if (ImmOp.isCImm()) { ImmOp.ChangeToImmediate(ImmOp.getCImm()->getSExtValue()); } else { @@ -2568,6 +2570,12 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { unsigned Opcode; if (DstRB->getID() == AMDGPU::VCCRegBankID) { Opcode = STI.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + } else if (Size == 64 && + AMDGPU::isValid32BitLiteral(I.getOperand(1).getImm(), IsFP)) { + Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO; + I.setDesc(TII.get(Opcode)); + I.addImplicitDefUseOperands(*MF); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } else { Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index b32ed9fef5dd34..b7ac90e33f65e0 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -367,7 +367,7 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, SMovOp = AMDGPU::S_MOV_B32; break; case AMDGPU::V_MOV_B64_PSEUDO: - SMovOp = AMDGPU::S_MOV_B64; + SMovOp = AMDGPU::S_MOV_B64_IMM_PSEUDO; break; } Imm = ImmOp->getImm(); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index e56269438472ee..ba3ed939561d4b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1966,6 +1966,29 @@ def : GCNPat < (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm))) >; +// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit +// immediate and wil be expanded as needed, but we will only use these patterns +// for values which can be encoded. +def : GCNPat < + (VGPRImm<(i64 imm)>:$imm), + (V_MOV_B64_PSEUDO imm:$imm) +>; + +def : GCNPat < + (VGPRImm<(f64 fpimm)>:$imm), + (V_MOV_B64_PSEUDO (f64 (bitcast_fpimm_to_i64 $imm))) +>; + +def : GCNPat < + (i64 imm:$imm), + (S_MOV_B64_IMM_PSEUDO imm:$imm) +>; + +def : GCNPat < + (f64 fpimm:$imm), + (S_MOV_B64_IMM_PSEUDO (i64 (bitcast_fpimm_to_i64 fpimm:$imm))) +>; + def : GCNPat < (f32 fpimm:$imm), (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm))) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll index ed525fb83c6de8..621394fd290b0c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll @@ -41,11 +41,12 @@ entry: } ; GCN-LABEL: {{^}}v_clamp_i64_i16_invalid_lower +; GFX6789: v_mov_b32_e32 v{{[0-9]+}}, 0x8001 ; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8001 ; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc ; GFX6789: v_cndmask_b32_e32 [[C:v[0-9]+]], 0, [[C]], vcc -; GFX10: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8001, [[A]], vcc_lo +; GFX10: v_{{(dual_)?}}cndmask_b32{{(_e32)?}} [[A:v[0-9]+]], 0x8001, [[A]] ; GFX10: v_cndmask_b32_e32 [[B:v[0-9]+]], 0, [[B]], vcc_lo define i16 @v_clamp_i64_i16_invalid_lower(i64 %in) #0 { entry: @@ -56,6 +57,7 @@ entry: } ; GCN-LABEL: {{^}}v_clamp_i64_i16_invalid_lower_and_higher +; GFX6789: v_mov_b32_e32 v{{[0-9]+}}, 0x8000 ; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8000 ; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc ; GFX10: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8000, [[A]], vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll index 701a733d9e8e95..8bf34caea40513 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -2090,69 +2090,69 @@ define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) { ; GCN-LABEL: dyn_extract_v16f64_s_s: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s66, 0 +; GCN-NEXT: s_mov_b32 s64, 0 +; GCN-NEXT: s_mov_b32 s62, 0 +; GCN-NEXT: s_mov_b32 s60, 0 +; GCN-NEXT: s_mov_b32 s58, 0 +; GCN-NEXT: s_mov_b32 s56, 0 +; GCN-NEXT: s_mov_b32 s54, 0 +; GCN-NEXT: s_mov_b32 s52, 0 +; GCN-NEXT: s_mov_b32 s50, 0 +; GCN-NEXT: s_mov_b32 s48, 0 +; GCN-NEXT: s_mov_b32 s46, 0 +; GCN-NEXT: s_mov_b32 s44, 0 +; GCN-NEXT: s_mov_b32 s40, 0 ; GCN-NEXT: s_mov_b64 s[36:37], 1.0 ; GCN-NEXT: s_mov_b32 m0, s2 ; GCN-NEXT: s_mov_b32 s67, 0x40300000 ; GCN-NEXT: s_mov_b32 s65, 0x402e0000 -; GCN-NEXT: s_mov_b32 s64, s66 ; GCN-NEXT: s_mov_b32 s63, 0x402c0000 -; GCN-NEXT: s_mov_b32 s62, s66 ; GCN-NEXT: s_mov_b32 s61, 0x402a0000 -; GCN-NEXT: s_mov_b32 s60, s66 ; GCN-NEXT: s_mov_b32 s59, 0x40280000 -; GCN-NEXT: s_mov_b32 s58, s66 ; GCN-NEXT: s_mov_b32 s57, 0x40260000 -; GCN-NEXT: s_mov_b32 s56, s66 ; GCN-NEXT: s_mov_b32 s55, 0x40240000 -; GCN-NEXT: s_mov_b32 s54, s66 ; GCN-NEXT: s_mov_b32 s53, 0x40220000 -; GCN-NEXT: s_mov_b32 s52, s66 ; GCN-NEXT: s_mov_b32 s51, 0x40200000 -; GCN-NEXT: s_mov_b32 s50, s66 ; GCN-NEXT: s_mov_b32 s49, 0x401c0000 -; GCN-NEXT: s_mov_b32 s48, s66 ; GCN-NEXT: s_mov_b32 s47, 0x40180000 -; GCN-NEXT: s_mov_b32 s46, s66 ; GCN-NEXT: s_mov_b32 s45, 0x40140000 -; GCN-NEXT: s_mov_b32 s44, s66 ; GCN-NEXT: s_mov_b64 s[42:43], 4.0 ; GCN-NEXT: s_mov_b32 s41, 0x40080000 -; GCN-NEXT: s_mov_b32 s40, s66 ; GCN-NEXT: s_mov_b64 s[38:39], 2.0 ; GCN-NEXT: s_movrels_b64 s[0:1], s[36:37] ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: dyn_extract_v16f64_s_s: ; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: s_mov_b32 s66, 0 ; GFX10PLUS-NEXT: s_mov_b64 s[36:37], 1.0 ; GFX10PLUS-NEXT: s_mov_b32 m0, s2 +; GFX10PLUS-NEXT: s_mov_b32 s66, 0 +; GFX10PLUS-NEXT: s_mov_b32 s64, 0 +; GFX10PLUS-NEXT: s_mov_b32 s62, 0 +; GFX10PLUS-NEXT: s_mov_b32 s60, 0 +; GFX10PLUS-NEXT: s_mov_b32 s58, 0 +; GFX10PLUS-NEXT: s_mov_b32 s56, 0 +; GFX10PLUS-NEXT: s_mov_b32 s54, 0 +; GFX10PLUS-NEXT: s_mov_b32 s52, 0 +; GFX10PLUS-NEXT: s_mov_b32 s50, 0 +; GFX10PLUS-NEXT: s_mov_b32 s48, 0 +; GFX10PLUS-NEXT: s_mov_b32 s46, 0 +; GFX10PLUS-NEXT: s_mov_b32 s44, 0 +; GFX10PLUS-NEXT: s_mov_b32 s40, 0 ; GFX10PLUS-NEXT: s_mov_b32 s67, 0x40300000 ; GFX10PLUS-NEXT: s_mov_b32 s65, 0x402e0000 -; GFX10PLUS-NEXT: s_mov_b32 s64, s66 ; GFX10PLUS-NEXT: s_mov_b32 s63, 0x402c0000 -; GFX10PLUS-NEXT: s_mov_b32 s62, s66 ; GFX10PLUS-NEXT: s_mov_b32 s61, 0x402a0000 -; GFX10PLUS-NEXT: s_mov_b32 s60, s66 ; GFX10PLUS-NEXT: s_mov_b32 s59, 0x40280000 -; GFX10PLUS-NEXT: s_mov_b32 s58, s66 ; GFX10PLUS-NEXT: s_mov_b32 s57, 0x40260000 -; GFX10PLUS-NEXT: s_mov_b32 s56, s66 ; GFX10PLUS-NEXT: s_mov_b32 s55, 0x40240000 -; GFX10PLUS-NEXT: s_mov_b32 s54, s66 ; GFX10PLUS-NEXT: s_mov_b32 s53, 0x40220000 -; GFX10PLUS-NEXT: s_mov_b32 s52, s66 ; GFX10PLUS-NEXT: s_mov_b32 s51, 0x40200000 -; GFX10PLUS-NEXT: s_mov_b32 s50, s66 ; GFX10PLUS-NEXT: s_mov_b32 s49, 0x401c0000 -; GFX10PLUS-NEXT: s_mov_b32 s48, s66 ; GFX10PLUS-NEXT: s_mov_b32 s47, 0x40180000 -; GFX10PLUS-NEXT: s_mov_b32 s46, s66 ; GFX10PLUS-NEXT: s_mov_b32 s45, 0x40140000 -; GFX10PLUS-NEXT: s_mov_b32 s44, s66 ; GFX10PLUS-NEXT: s_mov_b64 s[42:43], 4.0 ; GFX10PLUS-NEXT: s_mov_b32 s41, 0x40080000 -; GFX10PLUS-NEXT: s_mov_b32 s40, s66 ; GFX10PLUS-NEXT: s_mov_b64 s[38:39], 2.0 ; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[36:37] ; GFX10PLUS-NEXT: ; return to shader part epilog @@ -3085,10 +3085,10 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GPRIDX-NEXT: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GPRIDX-NEXT: s_load_dword s8, s[4:5], 0x8 +; GPRIDX-NEXT: s_mov_b32 s4, 0 +; GPRIDX-NEXT: s_mov_b32 s5, 0x40080000 ; GPRIDX-NEXT: s_mov_b32 s2, 0 ; GPRIDX-NEXT: s_mov_b32 s3, 0x40140000 -; GPRIDX-NEXT: s_mov_b32 s5, 0x40080000 -; GPRIDX-NEXT: s_mov_b32 s4, s2 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1 ; GPRIDX-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 @@ -3176,10 +3176,10 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; MOVREL-NEXT: ; %bb.0: ; %entry ; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; MOVREL-NEXT: s_load_dword s8, s[4:5], 0x8 +; MOVREL-NEXT: s_mov_b32 s4, 0 +; MOVREL-NEXT: s_mov_b32 s5, 0x40080000 ; MOVREL-NEXT: s_mov_b32 s2, 0 ; MOVREL-NEXT: s_mov_b32 s3, 0x40140000 -; MOVREL-NEXT: s_mov_b32 s5, 0x40080000 -; MOVREL-NEXT: s_mov_b32 s4, s2 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0) ; MOVREL-NEXT: s_cmp_eq_u32 s8, 1 ; MOVREL-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 @@ -3207,7 +3207,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GFX10-NEXT: kernel_code_entry_byte_offset = 256 ; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 ; GFX10-NEXT: granulated_workitem_vgpr_count = 0 -; GFX10-NEXT: granulated_wavefront_sgpr_count = 1 +; GFX10-NEXT: granulated_wavefront_sgpr_count = 0 ; GFX10-NEXT: priority = 0 ; GFX10-NEXT: float_mode = 240 ; GFX10-NEXT: priv = 0 @@ -3250,7 +3250,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GFX10-NEXT: gds_segment_byte_size = 0 ; GFX10-NEXT: kernarg_segment_byte_size = 12 ; GFX10-NEXT: workgroup_fbarrier_count = 0 -; GFX10-NEXT: wavefront_sgpr_count = 9 +; GFX10-NEXT: wavefront_sgpr_count = 7 ; GFX10-NEXT: workitem_vgpr_count = 3 ; GFX10-NEXT: reserved_vgpr_first = 0 ; GFX10-NEXT: reserved_vgpr_count = 0 @@ -3267,22 +3267,22 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GFX10-NEXT: .end_amd_kernel_code_t ; GFX10-NEXT: ; %bb.0: ; %entry ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s8, s[4:5], 0x8 +; GFX10-NEXT: s_load_dword s6, s[4:5], 0x8 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: s_mov_b32 s2, 0 -; GFX10-NEXT: s_mov_b32 s3, 0x40140000 -; GFX10-NEXT: s_mov_b32 s5, 0x40080000 -; GFX10-NEXT: s_mov_b32 s4, s2 +; GFX10-NEXT: s_mov_b32 s3, 0x40080000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_cmp_eq_u32 s8, 1 -; GFX10-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 -; GFX10-NEXT: s_cmp_eq_u32 s8, 2 -; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] -; GFX10-NEXT: s_cmp_eq_u32 s8, 3 -; GFX10-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] -; GFX10-NEXT: s_cmp_eq_u32 s8, 4 +; GFX10-NEXT: s_cmp_eq_u32 s6, 1 +; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 +; GFX10-NEXT: s_cmp_eq_u32 s6, 2 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] +; GFX10-NEXT: s_cmp_eq_u32 s6, 3 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b32 s5, 0x40140000 +; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] +; GFX10-NEXT: s_cmp_eq_u32 s6, 4 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] @@ -3299,7 +3299,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GFX11-NEXT: kernel_code_entry_byte_offset = 256 ; GFX11-NEXT: kernel_code_prefetch_byte_size = 0 ; GFX11-NEXT: granulated_workitem_vgpr_count = 0 -; GFX11-NEXT: granulated_wavefront_sgpr_count = 1 +; GFX11-NEXT: granulated_wavefront_sgpr_count = 0 ; GFX11-NEXT: priority = 0 ; GFX11-NEXT: float_mode = 240 ; GFX11-NEXT: priv = 0 @@ -3342,7 +3342,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GFX11-NEXT: gds_segment_byte_size = 0 ; GFX11-NEXT: kernarg_segment_byte_size = 12 ; GFX11-NEXT: workgroup_fbarrier_count = 0 -; GFX11-NEXT: wavefront_sgpr_count = 9 +; GFX11-NEXT: wavefront_sgpr_count = 7 ; GFX11-NEXT: workitem_vgpr_count = 3 ; GFX11-NEXT: reserved_vgpr_first = 0 ; GFX11-NEXT: reserved_vgpr_count = 0 @@ -3359,22 +3359,22 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GFX11-NEXT: .end_amd_kernel_code_t ; GFX11-NEXT: ; %bb.0: ; %entry ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: s_load_b32 s8, s[0:1], 0x8 +; GFX11-NEXT: s_load_b32 s6, s[0:1], 0x8 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s2, 0 -; GFX11-NEXT: s_mov_b32 s3, 0x40140000 -; GFX11-NEXT: s_mov_b32 s5, 0x40080000 -; GFX11-NEXT: s_mov_b32 s4, s2 +; GFX11-NEXT: s_mov_b32 s3, 0x40080000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_cmp_eq_u32 s8, 1 -; GFX11-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 -; GFX11-NEXT: s_cmp_eq_u32 s8, 2 -; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] -; GFX11-NEXT: s_cmp_eq_u32 s8, 3 -; GFX11-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] -; GFX11-NEXT: s_cmp_eq_u32 s8, 4 +; GFX11-NEXT: s_cmp_eq_u32 s6, 1 +; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 +; GFX11-NEXT: s_cmp_eq_u32 s6, 2 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b32 s5, 0x40140000 +; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] +; GFX11-NEXT: s_cmp_eq_u32 s6, 4 +; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_nop 0 @@ -4784,11 +4784,8 @@ define i32 @v_extract_v64i32_32(ptr addrspace(1) %ptr) { ; MOVREL-LABEL: v_extract_v64i32_32: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80 -; MOVREL-NEXT: v_mov_b32_e32 v2, s4 -; MOVREL-NEXT: v_mov_b32_e32 v3, s5 -; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x80, v0 +; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; MOVREL-NEXT: s_waitcnt vmcnt(0) ; MOVREL-NEXT: s_setpc_b64 s[30:31] @@ -4823,11 +4820,8 @@ define i32 @v_extract_v64i32_33(ptr addrspace(1) %ptr) { ; MOVREL-LABEL: v_extract_v64i32_33: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80 -; MOVREL-NEXT: v_mov_b32_e32 v2, s4 -; MOVREL-NEXT: v_mov_b32_e32 v3, s5 -; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x80, v0 +; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; MOVREL-NEXT: s_waitcnt vmcnt(0) ; MOVREL-NEXT: v_mov_b32_e32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll index 66bff4a14cac84..c6ea046f95a919 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll @@ -1473,12 +1473,12 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 { ; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1 +; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 ; GFX940-NEXT: s_endpgm @@ -1504,12 +1504,12 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 { ; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] +; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc1 ; GFX940-NEXT: s_endpgm @@ -1549,12 +1549,12 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 { ; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_system: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1 +; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 ; GFX940-NEXT: s_endpgm @@ -1748,12 +1748,12 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) { ; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] +; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc1 ; GFX940-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll index 139bb40daa930a..056629ca354518 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -5824,29 +5824,28 @@ define <2 x i64> @v_fshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) { define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) { ; GFX6-LABEL: s_fshl_i128: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b64 s[10:11], 0x7f -; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] -; GFX6-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] -; GFX6-NEXT: s_sub_i32 s9, s12, 64 -; GFX6-NEXT: s_sub_i32 s10, 64, s12 -; GFX6-NEXT: s_cmp_lt_u32 s12, 64 +; GFX6-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f +; GFX6-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9] +; GFX6-NEXT: s_sub_i32 s9, s10, 64 +; GFX6-NEXT: s_sub_i32 s11, 64, s10 +; GFX6-NEXT: s_cmp_lt_u32 s10, 64 +; GFX6-NEXT: s_cselect_b32 s13, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s10, 0 ; GFX6-NEXT: s_cselect_b32 s18, 1, 0 -; GFX6-NEXT: s_cmp_eq_u32 s12, 0 -; GFX6-NEXT: s_cselect_b32 s19, 1, 0 -; GFX6-NEXT: s_lshl_b64 s[14:15], s[0:1], s12 -; GFX6-NEXT: s_lshr_b64 s[16:17], s[0:1], s10 -; GFX6-NEXT: s_lshl_b64 s[12:13], s[2:3], s12 -; GFX6-NEXT: s_or_b64 s[12:13], s[16:17], s[12:13] +; GFX6-NEXT: s_lshl_b64 s[14:15], s[0:1], s10 +; GFX6-NEXT: s_lshr_b64 s[16:17], s[0:1], s11 +; GFX6-NEXT: s_lshl_b64 s[10:11], s[2:3], s10 +; GFX6-NEXT: s_or_b64 s[10:11], s[16:17], s[10:11] ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 -; GFX6-NEXT: s_cmp_lg_u32 s18, 0 +; GFX6-NEXT: s_cmp_lg_u32 s13, 0 ; GFX6-NEXT: s_cselect_b64 s[14:15], s[14:15], 0 -; GFX6-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1] -; GFX6-NEXT: s_cmp_lg_u32 s19, 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] +; GFX6-NEXT: s_cmp_lg_u32 s18, 0 +; GFX6-NEXT: s_mov_b32 s12, 0 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX6-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 -; GFX6-NEXT: s_lshl_b32 s5, s6, 31 -; GFX6-NEXT: s_mov_b32 s4, s11 -; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] +; GFX6-NEXT: s_lshl_b32 s13, s6, 31 +; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13] ; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 ; GFX6-NEXT: s_sub_i32 s12, s8, 64 ; GFX6-NEXT: s_sub_i32 s10, 64, s8 @@ -5871,29 +5870,28 @@ define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; ; GFX8-LABEL: s_fshl_i128: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_mov_b64 s[10:11], 0x7f -; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] -; GFX8-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] -; GFX8-NEXT: s_sub_i32 s9, s12, 64 -; GFX8-NEXT: s_sub_i32 s10, 64, s12 -; GFX8-NEXT: s_cmp_lt_u32 s12, 64 +; GFX8-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f +; GFX8-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9] +; GFX8-NEXT: s_sub_i32 s9, s10, 64 +; GFX8-NEXT: s_sub_i32 s11, 64, s10 +; GFX8-NEXT: s_cmp_lt_u32 s10, 64 +; GFX8-NEXT: s_cselect_b32 s13, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s10, 0 ; GFX8-NEXT: s_cselect_b32 s18, 1, 0 -; GFX8-NEXT: s_cmp_eq_u32 s12, 0 -; GFX8-NEXT: s_cselect_b32 s19, 1, 0 -; GFX8-NEXT: s_lshl_b64 s[14:15], s[0:1], s12 -; GFX8-NEXT: s_lshr_b64 s[16:17], s[0:1], s10 -; GFX8-NEXT: s_lshl_b64 s[12:13], s[2:3], s12 -; GFX8-NEXT: s_or_b64 s[12:13], s[16:17], s[12:13] +; GFX8-NEXT: s_lshl_b64 s[14:15], s[0:1], s10 +; GFX8-NEXT: s_lshr_b64 s[16:17], s[0:1], s11 +; GFX8-NEXT: s_lshl_b64 s[10:11], s[2:3], s10 +; GFX8-NEXT: s_or_b64 s[10:11], s[16:17], s[10:11] ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 -; GFX8-NEXT: s_cmp_lg_u32 s18, 0 +; GFX8-NEXT: s_cmp_lg_u32 s13, 0 ; GFX8-NEXT: s_cselect_b64 s[14:15], s[14:15], 0 -; GFX8-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1] -; GFX8-NEXT: s_cmp_lg_u32 s19, 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] +; GFX8-NEXT: s_cmp_lg_u32 s18, 0 +; GFX8-NEXT: s_mov_b32 s12, 0 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX8-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 -; GFX8-NEXT: s_lshl_b32 s5, s6, 31 -; GFX8-NEXT: s_mov_b32 s4, s11 -; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] +; GFX8-NEXT: s_lshl_b32 s13, s6, 31 +; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13] ; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 ; GFX8-NEXT: s_sub_i32 s12, s8, 64 ; GFX8-NEXT: s_sub_i32 s10, 64, s8 @@ -5918,29 +5916,28 @@ define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; ; GFX9-LABEL: s_fshl_i128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b64 s[10:11], 0x7f -; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] -; GFX9-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] -; GFX9-NEXT: s_sub_i32 s9, s12, 64 -; GFX9-NEXT: s_sub_i32 s10, 64, s12 -; GFX9-NEXT: s_cmp_lt_u32 s12, 64 +; GFX9-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f +; GFX9-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9] +; GFX9-NEXT: s_sub_i32 s9, s10, 64 +; GFX9-NEXT: s_sub_i32 s11, 64, s10 +; GFX9-NEXT: s_cmp_lt_u32 s10, 64 +; GFX9-NEXT: s_cselect_b32 s13, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s10, 0 ; GFX9-NEXT: s_cselect_b32 s18, 1, 0 -; GFX9-NEXT: s_cmp_eq_u32 s12, 0 -; GFX9-NEXT: s_cselect_b32 s19, 1, 0 -; GFX9-NEXT: s_lshl_b64 s[14:15], s[0:1], s12 -; GFX9-NEXT: s_lshr_b64 s[16:17], s[0:1], s10 -; GFX9-NEXT: s_lshl_b64 s[12:13], s[2:3], s12 -; GFX9-NEXT: s_or_b64 s[12:13], s[16:17], s[12:13] +; GFX9-NEXT: s_lshl_b64 s[14:15], s[0:1], s10 +; GFX9-NEXT: s_lshr_b64 s[16:17], s[0:1], s11 +; GFX9-NEXT: s_lshl_b64 s[10:11], s[2:3], s10 +; GFX9-NEXT: s_or_b64 s[10:11], s[16:17], s[10:11] ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 -; GFX9-NEXT: s_cmp_lg_u32 s18, 0 +; GFX9-NEXT: s_cmp_lg_u32 s13, 0 ; GFX9-NEXT: s_cselect_b64 s[14:15], s[14:15], 0 -; GFX9-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1] -; GFX9-NEXT: s_cmp_lg_u32 s19, 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] +; GFX9-NEXT: s_cmp_lg_u32 s18, 0 +; GFX9-NEXT: s_mov_b32 s12, 0 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX9-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 -; GFX9-NEXT: s_lshl_b32 s5, s6, 31 -; GFX9-NEXT: s_mov_b32 s4, s11 -; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] +; GFX9-NEXT: s_lshl_b32 s13, s6, 31 +; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13] ; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 ; GFX9-NEXT: s_sub_i32 s12, s8, 64 ; GFX9-NEXT: s_sub_i32 s10, 64, s8 @@ -5965,40 +5962,39 @@ define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; ; GFX10-LABEL: s_fshl_i128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mov_b64 s[10:11], 0x7f -; GFX10-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] -; GFX10-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] -; GFX10-NEXT: s_sub_i32 s9, s12, 64 -; GFX10-NEXT: s_sub_i32 s10, 64, s12 -; GFX10-NEXT: s_cmp_lt_u32 s12, 64 +; GFX10-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f +; GFX10-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9] +; GFX10-NEXT: s_sub_i32 s9, s10, 64 +; GFX10-NEXT: s_sub_i32 s11, 64, s10 +; GFX10-NEXT: s_cmp_lt_u32 s10, 64 +; GFX10-NEXT: s_mov_b32 s12, 0 +; GFX10-NEXT: s_cselect_b32 s13, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s10, 0 ; GFX10-NEXT: s_cselect_b32 s18, 1, 0 -; GFX10-NEXT: s_cmp_eq_u32 s12, 0 -; GFX10-NEXT: s_cselect_b32 s19, 1, 0 -; GFX10-NEXT: s_lshr_b64 s[14:15], s[0:1], s10 -; GFX10-NEXT: s_lshl_b64 s[16:17], s[2:3], s12 -; GFX10-NEXT: s_lshl_b64 s[12:13], s[0:1], s12 +; GFX10-NEXT: s_lshr_b64 s[14:15], s[0:1], s11 +; GFX10-NEXT: s_lshl_b64 s[16:17], s[2:3], s10 +; GFX10-NEXT: s_lshl_b64 s[10:11], s[0:1], s10 ; GFX10-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 -; GFX10-NEXT: s_cmp_lg_u32 s18, 0 -; GFX10-NEXT: s_cselect_b64 s[12:13], s[12:13], 0 +; GFX10-NEXT: s_cmp_lg_u32 s13, 0 +; GFX10-NEXT: s_cselect_b64 s[10:11], s[10:11], 0 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1] -; GFX10-NEXT: s_cmp_lg_u32 s19, 0 +; GFX10-NEXT: s_cmp_lg_u32 s18, 0 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 -; GFX10-NEXT: s_lshl_b32 s5, s6, 31 -; GFX10-NEXT: s_mov_b32 s4, s11 -; GFX10-NEXT: s_sub_i32 s14, s8, 64 -; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] +; GFX10-NEXT: s_lshl_b32 s13, s6, 31 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13] +; GFX10-NEXT: s_sub_i32 s14, s8, 64 ; GFX10-NEXT: s_sub_i32 s9, 64, s8 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64 ; GFX10-NEXT: s_cselect_b32 s15, 1, 0 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 -; GFX10-NEXT: s_lshl_b64 s[10:11], s[4:5], s9 +; GFX10-NEXT: s_lshl_b64 s[12:13], s[4:5], s9 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[4:5], s8 -; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[12:13] ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s14 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] @@ -6006,47 +6002,45 @@ define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] ; GFX10-NEXT: s_cmp_lg_u32 s15, 0 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[8:9], 0 -; GFX10-NEXT: s_or_b64 s[0:1], s[12:13], s[0:1] +; GFX10-NEXT: s_or_b64 s[0:1], s[10:11], s[0:1] ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: s_fshl_i128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_mov_b64 s[10:11], 0x7f -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] -; GFX11-NEXT: s_and_not1_b64 s[8:9], s[10:11], s[8:9] -; GFX11-NEXT: s_sub_i32 s9, s12, 64 -; GFX11-NEXT: s_sub_i32 s10, 64, s12 -; GFX11-NEXT: s_cmp_lt_u32 s12, 64 +; GFX11-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f +; GFX11-NEXT: s_and_not1_b64 s[8:9], 0x7f, s[8:9] +; GFX11-NEXT: s_sub_i32 s9, s10, 64 +; GFX11-NEXT: s_sub_i32 s11, 64, s10 +; GFX11-NEXT: s_cmp_lt_u32 s10, 64 +; GFX11-NEXT: s_mov_b32 s12, 0 +; GFX11-NEXT: s_cselect_b32 s13, 1, 0 +; GFX11-NEXT: s_cmp_eq_u32 s10, 0 ; GFX11-NEXT: s_cselect_b32 s18, 1, 0 -; GFX11-NEXT: s_cmp_eq_u32 s12, 0 -; GFX11-NEXT: s_cselect_b32 s19, 1, 0 -; GFX11-NEXT: s_lshr_b64 s[14:15], s[0:1], s10 -; GFX11-NEXT: s_lshl_b64 s[16:17], s[2:3], s12 -; GFX11-NEXT: s_lshl_b64 s[12:13], s[0:1], s12 +; GFX11-NEXT: s_lshr_b64 s[14:15], s[0:1], s11 +; GFX11-NEXT: s_lshl_b64 s[16:17], s[2:3], s10 +; GFX11-NEXT: s_lshl_b64 s[10:11], s[0:1], s10 ; GFX11-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 -; GFX11-NEXT: s_cmp_lg_u32 s18, 0 -; GFX11-NEXT: s_cselect_b64 s[12:13], s[12:13], 0 +; GFX11-NEXT: s_cmp_lg_u32 s13, 0 +; GFX11-NEXT: s_cselect_b64 s[10:11], s[10:11], 0 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1] -; GFX11-NEXT: s_cmp_lg_u32 s19, 0 +; GFX11-NEXT: s_cmp_lg_u32 s18, 0 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX11-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 -; GFX11-NEXT: s_lshl_b32 s5, s6, 31 -; GFX11-NEXT: s_mov_b32 s4, s11 -; GFX11-NEXT: s_sub_i32 s14, s8, 64 -; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] +; GFX11-NEXT: s_lshl_b32 s13, s6, 31 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 +; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13] +; GFX11-NEXT: s_sub_i32 s14, s8, 64 ; GFX11-NEXT: s_sub_i32 s9, 64, s8 ; GFX11-NEXT: s_cmp_lt_u32 s8, 64 ; GFX11-NEXT: s_cselect_b32 s15, 1, 0 ; GFX11-NEXT: s_cmp_eq_u32 s8, 0 ; GFX11-NEXT: s_cselect_b32 s16, 1, 0 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 -; GFX11-NEXT: s_lshl_b64 s[10:11], s[4:5], s9 +; GFX11-NEXT: s_lshl_b64 s[12:13], s[4:5], s9 ; GFX11-NEXT: s_lshr_b64 s[8:9], s[4:5], s8 -; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[12:13] ; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], s14 ; GFX11-NEXT: s_cmp_lg_u32 s15, 0 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] @@ -6054,7 +6048,7 @@ define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] ; GFX11-NEXT: s_cmp_lg_u32 s15, 0 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[8:9], 0 -; GFX11-NEXT: s_or_b64 s[0:1], s[12:13], s[0:1] +; GFX11-NEXT: s_or_b64 s[0:1], s[10:11], s[0:1] ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] ; GFX11-NEXT: ; return to shader part epilog %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) @@ -6575,23 +6569,22 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) { ; GFX6-LABEL: v_fshl_i128_svs: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] -; GFX6-NEXT: s_sub_i32 s5, s8, 64 -; GFX6-NEXT: s_sub_i32 s9, 64, s8 -; GFX6-NEXT: s_cmp_lt_u32 s8, 64 +; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX6-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] +; GFX6-NEXT: s_sub_i32 s5, s6, 64 +; GFX6-NEXT: s_sub_i32 s7, 64, s6 +; GFX6-NEXT: s_cmp_lt_u32 s6, 64 ; GFX6-NEXT: s_cselect_b32 s12, 1, 0 -; GFX6-NEXT: s_cmp_eq_u32 s8, 0 +; GFX6-NEXT: s_cmp_eq_u32 s6, 0 ; GFX6-NEXT: s_cselect_b32 s13, 1, 0 -; GFX6-NEXT: s_lshl_b64 s[6:7], s[0:1], s8 -; GFX6-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 -; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 -; GFX6-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] +; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], s6 +; GFX6-NEXT: s_lshr_b64 s[10:11], s[0:1], s7 +; GFX6-NEXT: s_lshl_b64 s[6:7], s[2:3], s6 +; GFX6-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 ; GFX6-NEXT: s_cmp_lg_u32 s12, 0 -; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 -; GFX6-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] +; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] ; GFX6-NEXT: s_cmp_lg_u32 s13, 0 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 1 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] @@ -6605,14 +6598,14 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; GFX6-NEXT: s_cmp_eq_u32 s4, 0 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s4 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s1 -; GFX6-NEXT: s_cselect_b32 s8, 1, 0 +; GFX6-NEXT: s_cselect_b32 s6, 1, 0 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s4 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s0 ; GFX6-NEXT: s_and_b32 s0, 1, s5 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX6-NEXT: s_and_b32 s0, 1, s8 +; GFX6-NEXT: s_and_b32 s0, 1, s6 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc ; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 @@ -6620,31 +6613,30 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc -; GFX6-NEXT: v_or_b32_e32 v0, s6, v0 -; GFX6-NEXT: v_or_b32_e32 v1, s7, v1 +; GFX6-NEXT: v_or_b32_e32 v0, s8, v0 +; GFX6-NEXT: v_or_b32_e32 v1, s9, v1 ; GFX6-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX6-NEXT: v_or_b32_e32 v3, s3, v3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_fshl_i128_svs: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] -; GFX8-NEXT: s_sub_i32 s5, s8, 64 -; GFX8-NEXT: s_sub_i32 s9, 64, s8 -; GFX8-NEXT: s_cmp_lt_u32 s8, 64 +; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX8-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] +; GFX8-NEXT: s_sub_i32 s5, s6, 64 +; GFX8-NEXT: s_sub_i32 s7, 64, s6 +; GFX8-NEXT: s_cmp_lt_u32 s6, 64 ; GFX8-NEXT: s_cselect_b32 s12, 1, 0 -; GFX8-NEXT: s_cmp_eq_u32 s8, 0 +; GFX8-NEXT: s_cmp_eq_u32 s6, 0 ; GFX8-NEXT: s_cselect_b32 s13, 1, 0 -; GFX8-NEXT: s_lshl_b64 s[6:7], s[0:1], s8 -; GFX8-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 -; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 -; GFX8-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] +; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], s6 +; GFX8-NEXT: s_lshr_b64 s[10:11], s[0:1], s7 +; GFX8-NEXT: s_lshl_b64 s[6:7], s[2:3], s6 +; GFX8-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 ; GFX8-NEXT: s_cmp_lg_u32 s12, 0 -; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 -; GFX8-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] +; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] ; GFX8-NEXT: s_cmp_lg_u32 s13, 0 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] @@ -6658,14 +6650,14 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; GFX8-NEXT: s_cmp_eq_u32 s4, 0 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] -; GFX8-NEXT: s_cselect_b32 s8, 1, 0 +; GFX8-NEXT: s_cselect_b32 s6, 1, 0 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], s4, v[2:3] ; GFX8-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3] ; GFX8-NEXT: s_and_b32 s0, 1, s5 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_and_b32 s0, 1, s8 +; GFX8-NEXT: s_and_b32 s0, 1, s6 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 @@ -6673,32 +6665,31 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc -; GFX8-NEXT: v_or_b32_e32 v0, s6, v0 -; GFX8-NEXT: v_or_b32_e32 v1, s7, v1 +; GFX8-NEXT: v_or_b32_e32 v0, s8, v0 +; GFX8-NEXT: v_or_b32_e32 v1, s9, v1 ; GFX8-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX8-NEXT: v_or_b32_e32 v3, s3, v3 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: v_fshl_i128_svs: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] -; GFX9-NEXT: s_sub_i32 s5, s8, 64 -; GFX9-NEXT: s_sub_i32 s9, 64, s8 -; GFX9-NEXT: s_cmp_lt_u32 s8, 64 +; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX9-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] +; GFX9-NEXT: s_sub_i32 s5, s6, 64 +; GFX9-NEXT: s_sub_i32 s7, 64, s6 +; GFX9-NEXT: s_cmp_lt_u32 s6, 64 ; GFX9-NEXT: s_cselect_b32 s12, 1, 0 -; GFX9-NEXT: s_cmp_eq_u32 s8, 0 +; GFX9-NEXT: s_cmp_eq_u32 s6, 0 ; GFX9-NEXT: s_cselect_b32 s13, 1, 0 -; GFX9-NEXT: s_lshl_b64 s[6:7], s[0:1], s8 -; GFX9-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 -; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 -; GFX9-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] +; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], s6 +; GFX9-NEXT: s_lshr_b64 s[10:11], s[0:1], s7 +; GFX9-NEXT: s_lshl_b64 s[6:7], s[2:3], s6 +; GFX9-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 ; GFX9-NEXT: s_cmp_lg_u32 s12, 0 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] -; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 -; GFX9-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] +; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] ; GFX9-NEXT: s_cmp_lg_u32 s13, 0 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 31, v1 @@ -6710,14 +6701,14 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] -; GFX9-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-NEXT: s_cselect_b32 s6, 1, 0 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], s4, v[2:3] ; GFX9-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3] ; GFX9-NEXT: s_and_b32 s0, 1, s5 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX9-NEXT: s_and_b32 s0, 1, s8 +; GFX9-NEXT: s_and_b32 s0, 1, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 @@ -6725,36 +6716,35 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc -; GFX9-NEXT: v_or_b32_e32 v0, s6, v0 -; GFX9-NEXT: v_or_b32_e32 v1, s7, v1 +; GFX9-NEXT: v_or_b32_e32 v0, s8, v0 +; GFX9-NEXT: v_or_b32_e32 v1, s9, v1 ; GFX9-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX9-NEXT: v_or_b32_e32 v3, s3, v3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: v_fshl_i128_svs: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f +; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX10-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] +; GFX10-NEXT: s_sub_i32 s5, s6, 64 +; GFX10-NEXT: s_sub_i32 s7, 64, s6 +; GFX10-NEXT: s_cmp_lt_u32 s6, 64 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] -; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] -; GFX10-NEXT: s_sub_i32 s5, s8, 64 -; GFX10-NEXT: s_sub_i32 s6, 64, s8 -; GFX10-NEXT: s_cmp_lt_u32 s8, 64 -; GFX10-NEXT: v_lshl_or_b32 v1, v2, 31, v1 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 -; GFX10-NEXT: s_cmp_eq_u32 s8, 0 -; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] +; GFX10-NEXT: s_cmp_eq_u32 s6, 0 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0 -; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 -; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], s8 -; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 -; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GFX10-NEXT: s_lshr_b64 s[8:9], s[0:1], s7 +; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], s6 +; GFX10-NEXT: s_lshl_b64 s[6:7], s[0:1], s6 +; GFX10-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0 -; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] -; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 -; GFX10-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] +; GFX10-NEXT: v_lshl_or_b32 v1, v2, 31, v1 +; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] +; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] ; GFX10-NEXT: s_cmp_lg_u32 s13, 0 +; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX10-NEXT: s_sub_i32 s0, 64, s4 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] @@ -6779,34 +6769,33 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 -; GFX10-NEXT: v_or_b32_e32 v0, s8, v0 -; GFX10-NEXT: v_or_b32_e32 v1, s9, v1 +; GFX10-NEXT: v_or_b32_e32 v0, s6, v0 +; GFX10-NEXT: v_or_b32_e32 v1, s7, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: v_fshl_i128_svs: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_mov_b64 s[6:7], 0x7f +; GFX11-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX11-NEXT: s_and_not1_b64 s[4:5], 0x7f, s[4:5] +; GFX11-NEXT: s_sub_i32 s5, s6, 64 +; GFX11-NEXT: s_sub_i32 s7, 64, s6 +; GFX11-NEXT: s_cmp_lt_u32 s6, 64 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] -; GFX11-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX11-NEXT: s_and_not1_b64 s[4:5], s[6:7], s[4:5] -; GFX11-NEXT: s_sub_i32 s5, s8, 64 -; GFX11-NEXT: s_sub_i32 s6, 64, s8 -; GFX11-NEXT: s_cmp_lt_u32 s8, 64 -; GFX11-NEXT: v_lshl_or_b32 v1, v2, 31, v1 ; GFX11-NEXT: s_cselect_b32 s12, 1, 0 -; GFX11-NEXT: s_cmp_eq_u32 s8, 0 -; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] +; GFX11-NEXT: s_cmp_eq_u32 s6, 0 ; GFX11-NEXT: s_cselect_b32 s13, 1, 0 -; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 -; GFX11-NEXT: s_lshl_b64 s[10:11], s[2:3], s8 -; GFX11-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 -; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GFX11-NEXT: s_lshr_b64 s[8:9], s[0:1], s7 +; GFX11-NEXT: s_lshl_b64 s[10:11], s[2:3], s6 +; GFX11-NEXT: s_lshl_b64 s[6:7], s[0:1], s6 +; GFX11-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 ; GFX11-NEXT: s_cmp_lg_u32 s12, 0 -; GFX11-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] -; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 -; GFX11-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] +; GFX11-NEXT: v_lshl_or_b32 v1, v2, 31, v1 +; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] +; GFX11-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 +; GFX11-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] ; GFX11-NEXT: s_cmp_lg_u32 s13, 0 +; GFX11-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX11-NEXT: s_sub_i32 s0, 64, s4 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -6833,9 +6822,9 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 -; GFX11-NEXT: v_or_b32_e32 v0, s8, v0 +; GFX11-NEXT: v_or_b32_e32 v0, s6, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-NEXT: v_or_b32_e32 v1, s9, v1 +; GFX11-NEXT: v_or_b32_e32 v1, s7, v1 ; GFX11-NEXT: ; return to shader part epilog %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) %cast.result = bitcast i128 %result to <4 x float> @@ -6845,23 +6834,22 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) { ; GFX6-LABEL: v_fshl_i128_vss: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] -; GFX6-NEXT: s_sub_i32 s5, s8, 64 -; GFX6-NEXT: s_sub_i32 s6, 64, s8 -; GFX6-NEXT: s_cmp_lt_u32 s8, 64 +; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX6-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] +; GFX6-NEXT: s_sub_i32 s5, s6, 64 +; GFX6-NEXT: s_sub_i32 s7, 64, s6 +; GFX6-NEXT: s_cmp_lt_u32 s6, 64 ; GFX6-NEXT: s_cselect_b32 s9, 1, 0 -; GFX6-NEXT: s_cmp_eq_u32 s8, 0 +; GFX6-NEXT: s_cmp_eq_u32 s6, 0 +; GFX6-NEXT: s_mov_b32 s8, 0 ; GFX6-NEXT: s_cselect_b32 s10, 1, 0 -; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s6 -; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s8 -; GFX6-NEXT: v_lshl_b64 v[8:9], v[0:1], s8 +; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s7 +; GFX6-NEXT: v_lshl_b64 v[8:9], v[0:1], s6 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s5 ; GFX6-NEXT: s_and_b32 s5, 1, s9 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 ; GFX6-NEXT: s_lshl_b32 s9, s2, 31 -; GFX6-NEXT: s_mov_b32 s8, s7 +; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s6 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 ; GFX6-NEXT: s_and_b32 s5, 1, s10 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] @@ -6900,23 +6888,22 @@ define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; ; GFX8-LABEL: v_fshl_i128_vss: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] -; GFX8-NEXT: s_sub_i32 s5, s8, 64 -; GFX8-NEXT: s_sub_i32 s6, 64, s8 -; GFX8-NEXT: s_cmp_lt_u32 s8, 64 +; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX8-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] +; GFX8-NEXT: s_sub_i32 s5, s6, 64 +; GFX8-NEXT: s_sub_i32 s7, 64, s6 +; GFX8-NEXT: s_cmp_lt_u32 s6, 64 ; GFX8-NEXT: s_cselect_b32 s9, 1, 0 -; GFX8-NEXT: s_cmp_eq_u32 s8, 0 +; GFX8-NEXT: s_cmp_eq_u32 s6, 0 +; GFX8-NEXT: s_mov_b32 s8, 0 ; GFX8-NEXT: s_cselect_b32 s10, 1, 0 -; GFX8-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] -; GFX8-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3] -; GFX8-NEXT: v_lshlrev_b64 v[8:9], s8, v[0:1] +; GFX8-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1] +; GFX8-NEXT: v_lshlrev_b64 v[8:9], s6, v[0:1] ; GFX8-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] ; GFX8-NEXT: s_and_b32 s5, 1, s9 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 ; GFX8-NEXT: s_lshl_b32 s9, s2, 31 -; GFX8-NEXT: s_mov_b32 s8, s7 +; GFX8-NEXT: v_lshlrev_b64 v[6:7], s6, v[2:3] ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 ; GFX8-NEXT: s_and_b32 s5, 1, s10 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] @@ -6955,23 +6942,22 @@ define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; ; GFX9-LABEL: v_fshl_i128_vss: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] -; GFX9-NEXT: s_sub_i32 s5, s8, 64 -; GFX9-NEXT: s_sub_i32 s6, 64, s8 -; GFX9-NEXT: s_cmp_lt_u32 s8, 64 +; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX9-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] +; GFX9-NEXT: s_sub_i32 s5, s6, 64 +; GFX9-NEXT: s_sub_i32 s7, 64, s6 +; GFX9-NEXT: s_cmp_lt_u32 s6, 64 ; GFX9-NEXT: s_cselect_b32 s9, 1, 0 -; GFX9-NEXT: s_cmp_eq_u32 s8, 0 +; GFX9-NEXT: s_cmp_eq_u32 s6, 0 +; GFX9-NEXT: s_mov_b32 s8, 0 ; GFX9-NEXT: s_cselect_b32 s10, 1, 0 -; GFX9-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] -; GFX9-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3] -; GFX9-NEXT: v_lshlrev_b64 v[8:9], s8, v[0:1] +; GFX9-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1] +; GFX9-NEXT: v_lshlrev_b64 v[8:9], s6, v[0:1] ; GFX9-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] ; GFX9-NEXT: s_and_b32 s5, 1, s9 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 ; GFX9-NEXT: s_lshl_b32 s9, s2, 31 -; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_lshlrev_b64 v[6:7], s6, v[2:3] ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 ; GFX9-NEXT: s_and_b32 s5, 1, s10 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] @@ -7010,39 +6996,38 @@ define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; ; GFX10-LABEL: v_fshl_i128_vss: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] -; GFX10-NEXT: s_sub_i32 s5, s8, 64 -; GFX10-NEXT: s_sub_i32 s6, 64, s8 -; GFX10-NEXT: s_cmp_lt_u32 s8, 64 -; GFX10-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] -; GFX10-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3] +; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX10-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] +; GFX10-NEXT: s_sub_i32 s5, s6, 64 +; GFX10-NEXT: s_sub_i32 s7, 64, s6 +; GFX10-NEXT: s_cmp_lt_u32 s6, 64 +; GFX10-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1] +; GFX10-NEXT: s_cselect_b32 s8, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s6, 0 +; GFX10-NEXT: v_lshlrev_b64 v[6:7], s6, v[2:3] ; GFX10-NEXT: s_cselect_b32 s9, 1, 0 -; GFX10-NEXT: s_cmp_eq_u32 s8, 0 -; GFX10-NEXT: v_lshlrev_b64 v[8:9], s8, v[0:1] -; GFX10-NEXT: s_cselect_b32 s10, 1, 0 -; GFX10-NEXT: s_and_b32 s6, 1, s9 +; GFX10-NEXT: v_lshlrev_b64 v[8:9], s6, v[0:1] +; GFX10-NEXT: s_and_b32 s6, 1, s8 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 -; GFX10-NEXT: s_lshl_b32 s9, s2, 31 -; GFX10-NEXT: s_mov_b32 s8, s7 +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6 +; GFX10-NEXT: s_mov_b32 s6, 0 +; GFX10-NEXT: s_lshl_b32 s7, s2, 31 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] -; GFX10-NEXT: s_and_b32 s5, 1, s10 -; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] +; GFX10-NEXT: s_and_b32 s5, 1, s9 +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7] ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 ; GFX10-NEXT: s_sub_i32 s10, s4, 64 ; GFX10-NEXT: s_sub_i32 s8, 64, s4 -; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX10-NEXT: s_cmp_lt_u32 s4, 64 +; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo ; GFX10-NEXT: s_cselect_b32 s11, 1, 0 ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 -; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s4 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 -; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5 @@ -7065,40 +7050,38 @@ define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; ; GFX11-LABEL: v_fshl_i128_vss: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX11-NEXT: s_and_not1_b64 s[4:5], s[6:7], s[4:5] -; GFX11-NEXT: s_sub_i32 s5, s8, 64 -; GFX11-NEXT: s_sub_i32 s6, 64, s8 -; GFX11-NEXT: s_cmp_lt_u32 s8, 64 -; GFX11-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] -; GFX11-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3] +; GFX11-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX11-NEXT: s_and_not1_b64 s[4:5], 0x7f, s[4:5] +; GFX11-NEXT: s_sub_i32 s5, s6, 64 +; GFX11-NEXT: s_sub_i32 s7, 64, s6 +; GFX11-NEXT: s_cmp_lt_u32 s6, 64 +; GFX11-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1] +; GFX11-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-NEXT: s_cmp_eq_u32 s6, 0 +; GFX11-NEXT: v_lshlrev_b64 v[6:7], s6, v[2:3] ; GFX11-NEXT: s_cselect_b32 s9, 1, 0 -; GFX11-NEXT: s_cmp_eq_u32 s8, 0 -; GFX11-NEXT: v_lshlrev_b64 v[8:9], s8, v[0:1] -; GFX11-NEXT: s_cselect_b32 s10, 1, 0 -; GFX11-NEXT: s_and_b32 s6, 1, s9 +; GFX11-NEXT: v_lshlrev_b64 v[8:9], s6, v[0:1] +; GFX11-NEXT: s_and_b32 s6, 1, s8 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 -; GFX11-NEXT: s_lshl_b32 s9, s2, 31 -; GFX11-NEXT: s_mov_b32 s8, s7 +; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6 +; GFX11-NEXT: s_mov_b32 s6, 0 +; GFX11-NEXT: s_lshl_b32 s7, s2, 31 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] -; GFX11-NEXT: s_and_b32 s5, 1, s10 -; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] +; GFX11-NEXT: s_and_b32 s5, 1, s9 +; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7] ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 ; GFX11-NEXT: s_sub_i32 s10, s4, 64 ; GFX11-NEXT: s_sub_i32 s8, 64, s4 -; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX11-NEXT: s_cmp_lt_u32 s4, 64 +; GFX11-NEXT: v_dual_cndmask_b32 v6, 0, v8 :: v_dual_cndmask_b32 v7, 0, v9 ; GFX11-NEXT: s_cselect_b32 s11, 1, 0 ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 -; GFX11-NEXT: v_dual_cndmask_b32 v6, 0, v8 :: v_dual_cndmask_b32 v7, 0, v9 +; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 ; GFX11-NEXT: s_cselect_b32 s12, 1, 0 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s4 ; GFX11-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 -; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[2:3], s4 ; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] @@ -7243,71 +7226,69 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) { define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) { ; GFX6-LABEL: s_fshl_v2i128: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b64 s[18:19], 0x7f -; GFX6-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] -; GFX6-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] -; GFX6-NEXT: s_sub_i32 s17, s22, 64 -; GFX6-NEXT: s_sub_i32 s23, 64, s22 -; GFX6-NEXT: s_cmp_lt_u32 s22, 64 +; GFX6-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f +; GFX6-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17] +; GFX6-NEXT: s_sub_i32 s17, s18, 64 +; GFX6-NEXT: s_sub_i32 s19, 64, s18 +; GFX6-NEXT: s_cmp_lt_u32 s18, 64 +; GFX6-NEXT: s_cselect_b32 s23, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s18, 0 ; GFX6-NEXT: s_cselect_b32 s28, 1, 0 -; GFX6-NEXT: s_cmp_eq_u32 s22, 0 -; GFX6-NEXT: s_cselect_b32 s29, 1, 0 -; GFX6-NEXT: s_lshl_b64 s[24:25], s[0:1], s22 -; GFX6-NEXT: s_lshr_b64 s[26:27], s[0:1], s23 -; GFX6-NEXT: s_lshl_b64 s[22:23], s[2:3], s22 -; GFX6-NEXT: s_or_b64 s[22:23], s[26:27], s[22:23] +; GFX6-NEXT: s_lshl_b64 s[24:25], s[0:1], s18 +; GFX6-NEXT: s_lshr_b64 s[26:27], s[0:1], s19 +; GFX6-NEXT: s_lshl_b64 s[18:19], s[2:3], s18 +; GFX6-NEXT: s_or_b64 s[18:19], s[26:27], s[18:19] ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s17 -; GFX6-NEXT: s_cmp_lg_u32 s28, 0 +; GFX6-NEXT: s_cmp_lg_u32 s23, 0 ; GFX6-NEXT: s_cselect_b64 s[24:25], s[24:25], 0 -; GFX6-NEXT: s_cselect_b64 s[0:1], s[22:23], s[0:1] -; GFX6-NEXT: s_cmp_lg_u32 s29, 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[18:19], s[0:1] +; GFX6-NEXT: s_cmp_lg_u32 s28, 0 +; GFX6-NEXT: s_mov_b32 s22, 0 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX6-NEXT: s_lshr_b64 s[0:1], s[8:9], 1 -; GFX6-NEXT: s_lshl_b32 s9, s10, 31 -; GFX6-NEXT: s_mov_b32 s8, s19 -; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] +; GFX6-NEXT: s_lshl_b32 s23, s10, 31 +; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[22:23] ; GFX6-NEXT: s_lshr_b64 s[8:9], s[10:11], 1 -; GFX6-NEXT: s_sub_i32 s26, s16, 64 -; GFX6-NEXT: s_sub_i32 s22, 64, s16 +; GFX6-NEXT: s_sub_i32 s23, s16, 64 +; GFX6-NEXT: s_sub_i32 s18, 64, s16 ; GFX6-NEXT: s_cmp_lt_u32 s16, 64 -; GFX6-NEXT: s_cselect_b32 s27, 1, 0 +; GFX6-NEXT: s_cselect_b32 s26, 1, 0 ; GFX6-NEXT: s_cmp_eq_u32 s16, 0 -; GFX6-NEXT: s_cselect_b32 s28, 1, 0 +; GFX6-NEXT: s_cselect_b32 s27, 1, 0 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[8:9], s16 ; GFX6-NEXT: s_lshr_b64 s[16:17], s[0:1], s16 -; GFX6-NEXT: s_lshl_b64 s[22:23], s[8:9], s22 -; GFX6-NEXT: s_or_b64 s[16:17], s[16:17], s[22:23] -; GFX6-NEXT: s_lshr_b64 s[8:9], s[8:9], s26 -; GFX6-NEXT: s_cmp_lg_u32 s27, 0 +; GFX6-NEXT: s_lshl_b64 s[18:19], s[8:9], s18 +; GFX6-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] +; GFX6-NEXT: s_lshr_b64 s[8:9], s[8:9], s23 +; GFX6-NEXT: s_cmp_lg_u32 s26, 0 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[16:17], s[8:9] -; GFX6-NEXT: s_cmp_lg_u32 s28, 0 -; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] ; GFX6-NEXT: s_cmp_lg_u32 s27, 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] +; GFX6-NEXT: s_cmp_lg_u32 s26, 0 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[10:11], 0 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] -; GFX6-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] -; GFX6-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] +; GFX6-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f +; GFX6-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21] ; GFX6-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1] ; GFX6-NEXT: s_sub_i32 s11, s8, 64 ; GFX6-NEXT: s_sub_i32 s9, 64, s8 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64 -; GFX6-NEXT: s_cselect_b32 s18, 1, 0 +; GFX6-NEXT: s_cselect_b32 s20, 1, 0 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0 -; GFX6-NEXT: s_cselect_b32 s22, 1, 0 +; GFX6-NEXT: s_cselect_b32 s21, 1, 0 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[4:5], s8 -; GFX6-NEXT: s_lshr_b64 s[20:21], s[4:5], s9 +; GFX6-NEXT: s_lshr_b64 s[18:19], s[4:5], s9 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[6:7], s8 -; GFX6-NEXT: s_or_b64 s[8:9], s[20:21], s[8:9] +; GFX6-NEXT: s_or_b64 s[8:9], s[18:19], s[8:9] ; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], s11 -; GFX6-NEXT: s_cmp_lg_u32 s18, 0 +; GFX6-NEXT: s_cmp_lg_u32 s20, 0 ; GFX6-NEXT: s_cselect_b64 s[16:17], s[16:17], 0 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] -; GFX6-NEXT: s_cmp_lg_u32 s22, 0 +; GFX6-NEXT: s_cmp_lg_u32 s21, 0 ; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] ; GFX6-NEXT: s_lshr_b64 s[4:5], s[12:13], 1 -; GFX6-NEXT: s_lshl_b32 s9, s14, 31 -; GFX6-NEXT: s_mov_b32 s8, s19 -; GFX6-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; GFX6-NEXT: s_lshl_b32 s23, s14, 31 +; GFX6-NEXT: s_or_b64 s[4:5], s[4:5], s[22:23] ; GFX6-NEXT: s_lshr_b64 s[8:9], s[14:15], 1 ; GFX6-NEXT: s_sub_i32 s18, s10, 64 ; GFX6-NEXT: s_sub_i32 s14, 64, s10 @@ -7332,71 +7313,69 @@ define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX8-LABEL: s_fshl_v2i128: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_mov_b64 s[18:19], 0x7f -; GFX8-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] -; GFX8-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] -; GFX8-NEXT: s_sub_i32 s17, s22, 64 -; GFX8-NEXT: s_sub_i32 s23, 64, s22 -; GFX8-NEXT: s_cmp_lt_u32 s22, 64 +; GFX8-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f +; GFX8-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17] +; GFX8-NEXT: s_sub_i32 s17, s18, 64 +; GFX8-NEXT: s_sub_i32 s19, 64, s18 +; GFX8-NEXT: s_cmp_lt_u32 s18, 64 +; GFX8-NEXT: s_cselect_b32 s23, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s18, 0 ; GFX8-NEXT: s_cselect_b32 s28, 1, 0 -; GFX8-NEXT: s_cmp_eq_u32 s22, 0 -; GFX8-NEXT: s_cselect_b32 s29, 1, 0 -; GFX8-NEXT: s_lshl_b64 s[24:25], s[0:1], s22 -; GFX8-NEXT: s_lshr_b64 s[26:27], s[0:1], s23 -; GFX8-NEXT: s_lshl_b64 s[22:23], s[2:3], s22 -; GFX8-NEXT: s_or_b64 s[22:23], s[26:27], s[22:23] +; GFX8-NEXT: s_lshl_b64 s[24:25], s[0:1], s18 +; GFX8-NEXT: s_lshr_b64 s[26:27], s[0:1], s19 +; GFX8-NEXT: s_lshl_b64 s[18:19], s[2:3], s18 +; GFX8-NEXT: s_or_b64 s[18:19], s[26:27], s[18:19] ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s17 -; GFX8-NEXT: s_cmp_lg_u32 s28, 0 +; GFX8-NEXT: s_cmp_lg_u32 s23, 0 ; GFX8-NEXT: s_cselect_b64 s[24:25], s[24:25], 0 -; GFX8-NEXT: s_cselect_b64 s[0:1], s[22:23], s[0:1] -; GFX8-NEXT: s_cmp_lg_u32 s29, 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[18:19], s[0:1] +; GFX8-NEXT: s_cmp_lg_u32 s28, 0 +; GFX8-NEXT: s_mov_b32 s22, 0 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX8-NEXT: s_lshr_b64 s[0:1], s[8:9], 1 -; GFX8-NEXT: s_lshl_b32 s9, s10, 31 -; GFX8-NEXT: s_mov_b32 s8, s19 -; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] +; GFX8-NEXT: s_lshl_b32 s23, s10, 31 +; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[22:23] ; GFX8-NEXT: s_lshr_b64 s[8:9], s[10:11], 1 -; GFX8-NEXT: s_sub_i32 s26, s16, 64 -; GFX8-NEXT: s_sub_i32 s22, 64, s16 +; GFX8-NEXT: s_sub_i32 s23, s16, 64 +; GFX8-NEXT: s_sub_i32 s18, 64, s16 ; GFX8-NEXT: s_cmp_lt_u32 s16, 64 -; GFX8-NEXT: s_cselect_b32 s27, 1, 0 +; GFX8-NEXT: s_cselect_b32 s26, 1, 0 ; GFX8-NEXT: s_cmp_eq_u32 s16, 0 -; GFX8-NEXT: s_cselect_b32 s28, 1, 0 +; GFX8-NEXT: s_cselect_b32 s27, 1, 0 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[8:9], s16 ; GFX8-NEXT: s_lshr_b64 s[16:17], s[0:1], s16 -; GFX8-NEXT: s_lshl_b64 s[22:23], s[8:9], s22 -; GFX8-NEXT: s_or_b64 s[16:17], s[16:17], s[22:23] -; GFX8-NEXT: s_lshr_b64 s[8:9], s[8:9], s26 -; GFX8-NEXT: s_cmp_lg_u32 s27, 0 +; GFX8-NEXT: s_lshl_b64 s[18:19], s[8:9], s18 +; GFX8-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] +; GFX8-NEXT: s_lshr_b64 s[8:9], s[8:9], s23 +; GFX8-NEXT: s_cmp_lg_u32 s26, 0 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[16:17], s[8:9] -; GFX8-NEXT: s_cmp_lg_u32 s28, 0 -; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] ; GFX8-NEXT: s_cmp_lg_u32 s27, 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] +; GFX8-NEXT: s_cmp_lg_u32 s26, 0 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[10:11], 0 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] -; GFX8-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] -; GFX8-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] +; GFX8-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f +; GFX8-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21] ; GFX8-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1] ; GFX8-NEXT: s_sub_i32 s11, s8, 64 ; GFX8-NEXT: s_sub_i32 s9, 64, s8 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64 -; GFX8-NEXT: s_cselect_b32 s18, 1, 0 +; GFX8-NEXT: s_cselect_b32 s20, 1, 0 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0 -; GFX8-NEXT: s_cselect_b32 s22, 1, 0 +; GFX8-NEXT: s_cselect_b32 s21, 1, 0 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[4:5], s8 -; GFX8-NEXT: s_lshr_b64 s[20:21], s[4:5], s9 +; GFX8-NEXT: s_lshr_b64 s[18:19], s[4:5], s9 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[6:7], s8 -; GFX8-NEXT: s_or_b64 s[8:9], s[20:21], s[8:9] +; GFX8-NEXT: s_or_b64 s[8:9], s[18:19], s[8:9] ; GFX8-NEXT: s_lshl_b64 s[4:5], s[4:5], s11 -; GFX8-NEXT: s_cmp_lg_u32 s18, 0 +; GFX8-NEXT: s_cmp_lg_u32 s20, 0 ; GFX8-NEXT: s_cselect_b64 s[16:17], s[16:17], 0 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] -; GFX8-NEXT: s_cmp_lg_u32 s22, 0 +; GFX8-NEXT: s_cmp_lg_u32 s21, 0 ; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] ; GFX8-NEXT: s_lshr_b64 s[4:5], s[12:13], 1 -; GFX8-NEXT: s_lshl_b32 s9, s14, 31 -; GFX8-NEXT: s_mov_b32 s8, s19 -; GFX8-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; GFX8-NEXT: s_lshl_b32 s23, s14, 31 +; GFX8-NEXT: s_or_b64 s[4:5], s[4:5], s[22:23] ; GFX8-NEXT: s_lshr_b64 s[8:9], s[14:15], 1 ; GFX8-NEXT: s_sub_i32 s18, s10, 64 ; GFX8-NEXT: s_sub_i32 s14, 64, s10 @@ -7421,71 +7400,69 @@ define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX9-LABEL: s_fshl_v2i128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b64 s[18:19], 0x7f -; GFX9-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] -; GFX9-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] -; GFX9-NEXT: s_sub_i32 s17, s22, 64 -; GFX9-NEXT: s_sub_i32 s23, 64, s22 -; GFX9-NEXT: s_cmp_lt_u32 s22, 64 +; GFX9-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f +; GFX9-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17] +; GFX9-NEXT: s_sub_i32 s17, s18, 64 +; GFX9-NEXT: s_sub_i32 s19, 64, s18 +; GFX9-NEXT: s_cmp_lt_u32 s18, 64 +; GFX9-NEXT: s_cselect_b32 s23, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s18, 0 ; GFX9-NEXT: s_cselect_b32 s28, 1, 0 -; GFX9-NEXT: s_cmp_eq_u32 s22, 0 -; GFX9-NEXT: s_cselect_b32 s29, 1, 0 -; GFX9-NEXT: s_lshl_b64 s[24:25], s[0:1], s22 -; GFX9-NEXT: s_lshr_b64 s[26:27], s[0:1], s23 -; GFX9-NEXT: s_lshl_b64 s[22:23], s[2:3], s22 -; GFX9-NEXT: s_or_b64 s[22:23], s[26:27], s[22:23] +; GFX9-NEXT: s_lshl_b64 s[24:25], s[0:1], s18 +; GFX9-NEXT: s_lshr_b64 s[26:27], s[0:1], s19 +; GFX9-NEXT: s_lshl_b64 s[18:19], s[2:3], s18 +; GFX9-NEXT: s_or_b64 s[18:19], s[26:27], s[18:19] ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s17 -; GFX9-NEXT: s_cmp_lg_u32 s28, 0 +; GFX9-NEXT: s_cmp_lg_u32 s23, 0 ; GFX9-NEXT: s_cselect_b64 s[24:25], s[24:25], 0 -; GFX9-NEXT: s_cselect_b64 s[0:1], s[22:23], s[0:1] -; GFX9-NEXT: s_cmp_lg_u32 s29, 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[18:19], s[0:1] +; GFX9-NEXT: s_cmp_lg_u32 s28, 0 +; GFX9-NEXT: s_mov_b32 s22, 0 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX9-NEXT: s_lshr_b64 s[0:1], s[8:9], 1 -; GFX9-NEXT: s_lshl_b32 s9, s10, 31 -; GFX9-NEXT: s_mov_b32 s8, s19 -; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] +; GFX9-NEXT: s_lshl_b32 s23, s10, 31 +; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[22:23] ; GFX9-NEXT: s_lshr_b64 s[8:9], s[10:11], 1 -; GFX9-NEXT: s_sub_i32 s26, s16, 64 -; GFX9-NEXT: s_sub_i32 s22, 64, s16 +; GFX9-NEXT: s_sub_i32 s23, s16, 64 +; GFX9-NEXT: s_sub_i32 s18, 64, s16 ; GFX9-NEXT: s_cmp_lt_u32 s16, 64 -; GFX9-NEXT: s_cselect_b32 s27, 1, 0 +; GFX9-NEXT: s_cselect_b32 s26, 1, 0 ; GFX9-NEXT: s_cmp_eq_u32 s16, 0 -; GFX9-NEXT: s_cselect_b32 s28, 1, 0 +; GFX9-NEXT: s_cselect_b32 s27, 1, 0 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[8:9], s16 ; GFX9-NEXT: s_lshr_b64 s[16:17], s[0:1], s16 -; GFX9-NEXT: s_lshl_b64 s[22:23], s[8:9], s22 -; GFX9-NEXT: s_or_b64 s[16:17], s[16:17], s[22:23] -; GFX9-NEXT: s_lshr_b64 s[8:9], s[8:9], s26 -; GFX9-NEXT: s_cmp_lg_u32 s27, 0 +; GFX9-NEXT: s_lshl_b64 s[18:19], s[8:9], s18 +; GFX9-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] +; GFX9-NEXT: s_lshr_b64 s[8:9], s[8:9], s23 +; GFX9-NEXT: s_cmp_lg_u32 s26, 0 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[16:17], s[8:9] -; GFX9-NEXT: s_cmp_lg_u32 s28, 0 -; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] ; GFX9-NEXT: s_cmp_lg_u32 s27, 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] +; GFX9-NEXT: s_cmp_lg_u32 s26, 0 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[10:11], 0 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] -; GFX9-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] -; GFX9-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] +; GFX9-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f +; GFX9-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21] ; GFX9-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1] ; GFX9-NEXT: s_sub_i32 s11, s8, 64 ; GFX9-NEXT: s_sub_i32 s9, 64, s8 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64 -; GFX9-NEXT: s_cselect_b32 s18, 1, 0 +; GFX9-NEXT: s_cselect_b32 s20, 1, 0 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0 -; GFX9-NEXT: s_cselect_b32 s22, 1, 0 +; GFX9-NEXT: s_cselect_b32 s21, 1, 0 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[4:5], s8 -; GFX9-NEXT: s_lshr_b64 s[20:21], s[4:5], s9 +; GFX9-NEXT: s_lshr_b64 s[18:19], s[4:5], s9 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[6:7], s8 -; GFX9-NEXT: s_or_b64 s[8:9], s[20:21], s[8:9] +; GFX9-NEXT: s_or_b64 s[8:9], s[18:19], s[8:9] ; GFX9-NEXT: s_lshl_b64 s[4:5], s[4:5], s11 -; GFX9-NEXT: s_cmp_lg_u32 s18, 0 +; GFX9-NEXT: s_cmp_lg_u32 s20, 0 ; GFX9-NEXT: s_cselect_b64 s[16:17], s[16:17], 0 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] -; GFX9-NEXT: s_cmp_lg_u32 s22, 0 +; GFX9-NEXT: s_cmp_lg_u32 s21, 0 ; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] ; GFX9-NEXT: s_lshr_b64 s[4:5], s[12:13], 1 -; GFX9-NEXT: s_lshl_b32 s9, s14, 31 -; GFX9-NEXT: s_mov_b32 s8, s19 -; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; GFX9-NEXT: s_lshl_b32 s23, s14, 31 +; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], s[22:23] ; GFX9-NEXT: s_lshr_b64 s[8:9], s[14:15], 1 ; GFX9-NEXT: s_sub_i32 s18, s10, 64 ; GFX9-NEXT: s_sub_i32 s14, 64, s10 @@ -7510,73 +7487,71 @@ define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX10-LABEL: s_fshl_v2i128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mov_b64 s[18:19], 0x7f -; GFX10-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] -; GFX10-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] -; GFX10-NEXT: s_sub_i32 s17, s22, 64 -; GFX10-NEXT: s_sub_i32 s23, 64, s22 -; GFX10-NEXT: s_cmp_lt_u32 s22, 64 +; GFX10-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f +; GFX10-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17] +; GFX10-NEXT: s_sub_i32 s17, s18, 64 +; GFX10-NEXT: s_sub_i32 s19, 64, s18 +; GFX10-NEXT: s_cmp_lt_u32 s18, 64 +; GFX10-NEXT: s_mov_b32 s22, 0 +; GFX10-NEXT: s_cselect_b32 s23, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s18, 0 ; GFX10-NEXT: s_cselect_b32 s28, 1, 0 -; GFX10-NEXT: s_cmp_eq_u32 s22, 0 -; GFX10-NEXT: s_cselect_b32 s29, 1, 0 -; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s23 -; GFX10-NEXT: s_lshl_b64 s[26:27], s[2:3], s22 -; GFX10-NEXT: s_lshl_b64 s[22:23], s[0:1], s22 +; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s19 +; GFX10-NEXT: s_lshl_b64 s[26:27], s[2:3], s18 +; GFX10-NEXT: s_lshl_b64 s[18:19], s[0:1], s18 ; GFX10-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s17 -; GFX10-NEXT: s_cmp_lg_u32 s28, 0 -; GFX10-NEXT: s_cselect_b64 s[22:23], s[22:23], 0 +; GFX10-NEXT: s_cmp_lg_u32 s23, 0 +; GFX10-NEXT: s_cselect_b64 s[18:19], s[18:19], 0 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1] -; GFX10-NEXT: s_cmp_lg_u32 s29, 0 +; GFX10-NEXT: s_cmp_lg_u32 s28, 0 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], 1 -; GFX10-NEXT: s_lshl_b32 s9, s10, 31 -; GFX10-NEXT: s_mov_b32 s8, s19 -; GFX10-NEXT: s_sub_i32 s26, s16, 64 -; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] +; GFX10-NEXT: s_lshl_b32 s23, s10, 31 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[10:11], 1 +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[22:23] +; GFX10-NEXT: s_sub_i32 s23, s16, 64 ; GFX10-NEXT: s_sub_i32 s17, 64, s16 ; GFX10-NEXT: s_cmp_lt_u32 s16, 64 -; GFX10-NEXT: s_cselect_b32 s27, 1, 0 +; GFX10-NEXT: s_cselect_b32 s26, 1, 0 ; GFX10-NEXT: s_cmp_eq_u32 s16, 0 -; GFX10-NEXT: s_cselect_b32 s28, 1, 0 +; GFX10-NEXT: s_cselect_b32 s27, 1, 0 ; GFX10-NEXT: s_lshr_b64 s[10:11], s[0:1], s16 ; GFX10-NEXT: s_lshl_b64 s[24:25], s[8:9], s17 ; GFX10-NEXT: s_lshr_b64 s[16:17], s[8:9], s16 ; GFX10-NEXT: s_or_b64 s[10:11], s[10:11], s[24:25] -; GFX10-NEXT: s_lshr_b64 s[8:9], s[8:9], s26 -; GFX10-NEXT: s_cmp_lg_u32 s27, 0 +; GFX10-NEXT: s_lshr_b64 s[8:9], s[8:9], s23 +; GFX10-NEXT: s_cmp_lg_u32 s26, 0 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9] -; GFX10-NEXT: s_cmp_lg_u32 s28, 0 -; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] ; GFX10-NEXT: s_cmp_lg_u32 s27, 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] +; GFX10-NEXT: s_cmp_lg_u32 s26, 0 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[16:17], 0 -; GFX10-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] +; GFX10-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21] ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] -; GFX10-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] -; GFX10-NEXT: s_or_b64 s[0:1], s[22:23], s[0:1] +; GFX10-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f +; GFX10-NEXT: s_or_b64 s[0:1], s[18:19], s[0:1] ; GFX10-NEXT: s_sub_i32 s11, s8, 64 ; GFX10-NEXT: s_sub_i32 s9, 64, s8 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64 -; GFX10-NEXT: s_cselect_b32 s18, 1, 0 +; GFX10-NEXT: s_cselect_b32 s20, 1, 0 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0 -; GFX10-NEXT: s_cselect_b32 s22, 1, 0 +; GFX10-NEXT: s_cselect_b32 s21, 1, 0 ; GFX10-NEXT: s_lshr_b64 s[16:17], s[4:5], s9 -; GFX10-NEXT: s_lshl_b64 s[20:21], s[6:7], s8 +; GFX10-NEXT: s_lshl_b64 s[18:19], s[6:7], s8 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[4:5], s8 -; GFX10-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21] +; GFX10-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] ; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], s11 -; GFX10-NEXT: s_cmp_lg_u32 s18, 0 +; GFX10-NEXT: s_cmp_lg_u32 s20, 0 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5] -; GFX10-NEXT: s_cmp_lg_u32 s22, 0 +; GFX10-NEXT: s_cmp_lg_u32 s21, 0 ; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] ; GFX10-NEXT: s_lshr_b64 s[4:5], s[12:13], 1 -; GFX10-NEXT: s_lshl_b32 s13, s14, 31 -; GFX10-NEXT: s_mov_b32 s12, s19 -; GFX10-NEXT: s_sub_i32 s18, s10, 64 -; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13] +; GFX10-NEXT: s_lshl_b32 s23, s14, 31 ; GFX10-NEXT: s_lshr_b64 s[12:13], s[14:15], 1 +; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[22:23] +; GFX10-NEXT: s_sub_i32 s18, s10, 64 ; GFX10-NEXT: s_sub_i32 s11, 64, s10 ; GFX10-NEXT: s_cmp_lt_u32 s10, 64 ; GFX10-NEXT: s_cselect_b32 s19, 1, 0 @@ -7599,74 +7574,71 @@ define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX11-LABEL: s_fshl_v2i128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_mov_b64 s[18:19], 0x7f -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] -; GFX11-NEXT: s_and_not1_b64 s[16:17], s[18:19], s[16:17] -; GFX11-NEXT: s_sub_i32 s17, s22, 64 -; GFX11-NEXT: s_sub_i32 s23, 64, s22 -; GFX11-NEXT: s_cmp_lt_u32 s22, 64 +; GFX11-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f +; GFX11-NEXT: s_and_not1_b64 s[16:17], 0x7f, s[16:17] +; GFX11-NEXT: s_sub_i32 s17, s18, 64 +; GFX11-NEXT: s_sub_i32 s19, 64, s18 +; GFX11-NEXT: s_cmp_lt_u32 s18, 64 +; GFX11-NEXT: s_mov_b32 s22, 0 +; GFX11-NEXT: s_cselect_b32 s23, 1, 0 +; GFX11-NEXT: s_cmp_eq_u32 s18, 0 ; GFX11-NEXT: s_cselect_b32 s28, 1, 0 -; GFX11-NEXT: s_cmp_eq_u32 s22, 0 -; GFX11-NEXT: s_cselect_b32 s29, 1, 0 -; GFX11-NEXT: s_lshr_b64 s[24:25], s[0:1], s23 -; GFX11-NEXT: s_lshl_b64 s[26:27], s[2:3], s22 -; GFX11-NEXT: s_lshl_b64 s[22:23], s[0:1], s22 +; GFX11-NEXT: s_lshr_b64 s[24:25], s[0:1], s19 +; GFX11-NEXT: s_lshl_b64 s[26:27], s[2:3], s18 +; GFX11-NEXT: s_lshl_b64 s[18:19], s[0:1], s18 ; GFX11-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s17 -; GFX11-NEXT: s_cmp_lg_u32 s28, 0 -; GFX11-NEXT: s_cselect_b64 s[22:23], s[22:23], 0 +; GFX11-NEXT: s_cmp_lg_u32 s23, 0 +; GFX11-NEXT: s_cselect_b64 s[18:19], s[18:19], 0 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1] -; GFX11-NEXT: s_cmp_lg_u32 s29, 0 +; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX11-NEXT: s_lshr_b64 s[0:1], s[8:9], 1 -; GFX11-NEXT: s_lshl_b32 s9, s10, 31 -; GFX11-NEXT: s_mov_b32 s8, s19 -; GFX11-NEXT: s_sub_i32 s26, s16, 64 -; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] +; GFX11-NEXT: s_lshl_b32 s23, s10, 31 ; GFX11-NEXT: s_lshr_b64 s[8:9], s[10:11], 1 +; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[22:23] +; GFX11-NEXT: s_sub_i32 s23, s16, 64 ; GFX11-NEXT: s_sub_i32 s17, 64, s16 ; GFX11-NEXT: s_cmp_lt_u32 s16, 64 -; GFX11-NEXT: s_cselect_b32 s27, 1, 0 +; GFX11-NEXT: s_cselect_b32 s26, 1, 0 ; GFX11-NEXT: s_cmp_eq_u32 s16, 0 -; GFX11-NEXT: s_cselect_b32 s28, 1, 0 +; GFX11-NEXT: s_cselect_b32 s27, 1, 0 ; GFX11-NEXT: s_lshr_b64 s[10:11], s[0:1], s16 ; GFX11-NEXT: s_lshl_b64 s[24:25], s[8:9], s17 ; GFX11-NEXT: s_lshr_b64 s[16:17], s[8:9], s16 ; GFX11-NEXT: s_or_b64 s[10:11], s[10:11], s[24:25] -; GFX11-NEXT: s_lshr_b64 s[8:9], s[8:9], s26 -; GFX11-NEXT: s_cmp_lg_u32 s27, 0 +; GFX11-NEXT: s_lshr_b64 s[8:9], s[8:9], s23 +; GFX11-NEXT: s_cmp_lg_u32 s26, 0 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9] -; GFX11-NEXT: s_cmp_lg_u32 s28, 0 -; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] ; GFX11-NEXT: s_cmp_lg_u32 s27, 0 +; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] +; GFX11-NEXT: s_cmp_lg_u32 s26, 0 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[16:17], 0 -; GFX11-NEXT: s_and_not1_b64 s[10:11], s[18:19], s[20:21] +; GFX11-NEXT: s_and_not1_b64 s[10:11], 0x7f, s[20:21] ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] -; GFX11-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] -; GFX11-NEXT: s_or_b64 s[0:1], s[22:23], s[0:1] +; GFX11-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f +; GFX11-NEXT: s_or_b64 s[0:1], s[18:19], s[0:1] ; GFX11-NEXT: s_sub_i32 s11, s8, 64 ; GFX11-NEXT: s_sub_i32 s9, 64, s8 ; GFX11-NEXT: s_cmp_lt_u32 s8, 64 -; GFX11-NEXT: s_cselect_b32 s18, 1, 0 +; GFX11-NEXT: s_cselect_b32 s20, 1, 0 ; GFX11-NEXT: s_cmp_eq_u32 s8, 0 -; GFX11-NEXT: s_cselect_b32 s22, 1, 0 +; GFX11-NEXT: s_cselect_b32 s21, 1, 0 ; GFX11-NEXT: s_lshr_b64 s[16:17], s[4:5], s9 -; GFX11-NEXT: s_lshl_b64 s[20:21], s[6:7], s8 +; GFX11-NEXT: s_lshl_b64 s[18:19], s[6:7], s8 ; GFX11-NEXT: s_lshl_b64 s[8:9], s[4:5], s8 -; GFX11-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21] +; GFX11-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] ; GFX11-NEXT: s_lshl_b64 s[4:5], s[4:5], s11 -; GFX11-NEXT: s_cmp_lg_u32 s18, 0 +; GFX11-NEXT: s_cmp_lg_u32 s20, 0 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5] -; GFX11-NEXT: s_cmp_lg_u32 s22, 0 +; GFX11-NEXT: s_cmp_lg_u32 s21, 0 ; GFX11-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] ; GFX11-NEXT: s_lshr_b64 s[4:5], s[12:13], 1 -; GFX11-NEXT: s_lshl_b32 s13, s14, 31 -; GFX11-NEXT: s_mov_b32 s12, s19 -; GFX11-NEXT: s_sub_i32 s18, s10, 64 -; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13] +; GFX11-NEXT: s_lshl_b32 s23, s14, 31 ; GFX11-NEXT: s_lshr_b64 s[12:13], s[14:15], 1 +; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[22:23] +; GFX11-NEXT: s_sub_i32 s18, s10, 64 ; GFX11-NEXT: s_sub_i32 s11, 64, s10 ; GFX11-NEXT: s_cmp_lt_u32 s10, 64 ; GFX11-NEXT: s_cselect_b32 s19, 1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll index 25d845f2f9922a..88fa7a8406475f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -5878,39 +5878,38 @@ define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) { define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) { ; GFX6-LABEL: s_fshr_i128: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s10, 0x7f -; GFX6-NEXT: s_mov_b32 s11, 0 -; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] -; GFX6-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] +; GFX6-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f +; GFX6-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9] +; GFX6-NEXT: s_lshl_b64 s[12:13], s[0:1], 1 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX6-NEXT: s_lshr_b32 s10, s1, 31 -; GFX6-NEXT: s_lshl_b64 s[14:15], s[0:1], 1 -; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[10:11] -; GFX6-NEXT: s_sub_i32 s13, s8, 64 +; GFX6-NEXT: s_lshr_b32 s0, s1, 31 +; GFX6-NEXT: s_mov_b32 s1, 0 +; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GFX6-NEXT: s_sub_i32 s11, s8, 64 ; GFX6-NEXT: s_sub_i32 s9, 64, s8 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64 ; GFX6-NEXT: s_cselect_b32 s16, 1, 0 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0 ; GFX6-NEXT: s_cselect_b32 s17, 1, 0 -; GFX6-NEXT: s_lshl_b64 s[2:3], s[14:15], s8 -; GFX6-NEXT: s_lshr_b64 s[10:11], s[14:15], s9 +; GFX6-NEXT: s_lshl_b64 s[2:3], s[12:13], s8 +; GFX6-NEXT: s_lshr_b64 s[14:15], s[12:13], s9 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 -; GFX6-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] -; GFX6-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 +; GFX6-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9] +; GFX6-NEXT: s_lshl_b64 s[12:13], s[12:13], s11 ; GFX6-NEXT: s_cmp_lg_u32 s16, 0 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 -; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] +; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[12:13] ; GFX6-NEXT: s_cmp_lg_u32 s17, 0 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] -; GFX6-NEXT: s_sub_i32 s14, s12, 64 -; GFX6-NEXT: s_sub_i32 s13, 64, s12 -; GFX6-NEXT: s_cmp_lt_u32 s12, 64 +; GFX6-NEXT: s_sub_i32 s14, s10, 64 +; GFX6-NEXT: s_sub_i32 s12, 64, s10 +; GFX6-NEXT: s_cmp_lt_u32 s10, 64 ; GFX6-NEXT: s_cselect_b32 s15, 1, 0 -; GFX6-NEXT: s_cmp_eq_u32 s12, 0 +; GFX6-NEXT: s_cmp_eq_u32 s10, 0 ; GFX6-NEXT: s_cselect_b32 s16, 1, 0 -; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 -; GFX6-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 -; GFX6-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 +; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], s10 +; GFX6-NEXT: s_lshr_b64 s[10:11], s[4:5], s10 +; GFX6-NEXT: s_lshl_b64 s[12:13], s[6:7], s12 ; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] ; GFX6-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 ; GFX6-NEXT: s_cmp_lg_u32 s15, 0 @@ -5925,39 +5924,38 @@ define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; ; GFX8-LABEL: s_fshr_i128: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_movk_i32 s10, 0x7f -; GFX8-NEXT: s_mov_b32 s11, 0 -; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] -; GFX8-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] +; GFX8-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f +; GFX8-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9] +; GFX8-NEXT: s_lshl_b64 s[12:13], s[0:1], 1 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX8-NEXT: s_lshr_b32 s10, s1, 31 -; GFX8-NEXT: s_lshl_b64 s[14:15], s[0:1], 1 -; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[10:11] -; GFX8-NEXT: s_sub_i32 s13, s8, 64 +; GFX8-NEXT: s_lshr_b32 s0, s1, 31 +; GFX8-NEXT: s_mov_b32 s1, 0 +; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GFX8-NEXT: s_sub_i32 s11, s8, 64 ; GFX8-NEXT: s_sub_i32 s9, 64, s8 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64 ; GFX8-NEXT: s_cselect_b32 s16, 1, 0 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0 ; GFX8-NEXT: s_cselect_b32 s17, 1, 0 -; GFX8-NEXT: s_lshl_b64 s[2:3], s[14:15], s8 -; GFX8-NEXT: s_lshr_b64 s[10:11], s[14:15], s9 +; GFX8-NEXT: s_lshl_b64 s[2:3], s[12:13], s8 +; GFX8-NEXT: s_lshr_b64 s[14:15], s[12:13], s9 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 -; GFX8-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] -; GFX8-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 +; GFX8-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9] +; GFX8-NEXT: s_lshl_b64 s[12:13], s[12:13], s11 ; GFX8-NEXT: s_cmp_lg_u32 s16, 0 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 -; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] +; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[12:13] ; GFX8-NEXT: s_cmp_lg_u32 s17, 0 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] -; GFX8-NEXT: s_sub_i32 s14, s12, 64 -; GFX8-NEXT: s_sub_i32 s13, 64, s12 -; GFX8-NEXT: s_cmp_lt_u32 s12, 64 +; GFX8-NEXT: s_sub_i32 s14, s10, 64 +; GFX8-NEXT: s_sub_i32 s12, 64, s10 +; GFX8-NEXT: s_cmp_lt_u32 s10, 64 ; GFX8-NEXT: s_cselect_b32 s15, 1, 0 -; GFX8-NEXT: s_cmp_eq_u32 s12, 0 +; GFX8-NEXT: s_cmp_eq_u32 s10, 0 ; GFX8-NEXT: s_cselect_b32 s16, 1, 0 -; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 -; GFX8-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 -; GFX8-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 +; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], s10 +; GFX8-NEXT: s_lshr_b64 s[10:11], s[4:5], s10 +; GFX8-NEXT: s_lshl_b64 s[12:13], s[6:7], s12 ; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] ; GFX8-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 ; GFX8-NEXT: s_cmp_lg_u32 s15, 0 @@ -5972,39 +5970,38 @@ define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; ; GFX9-LABEL: s_fshr_i128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s10, 0x7f -; GFX9-NEXT: s_mov_b32 s11, 0 -; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] -; GFX9-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] +; GFX9-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f +; GFX9-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9] +; GFX9-NEXT: s_lshl_b64 s[12:13], s[0:1], 1 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX9-NEXT: s_lshr_b32 s10, s1, 31 -; GFX9-NEXT: s_lshl_b64 s[14:15], s[0:1], 1 -; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[10:11] -; GFX9-NEXT: s_sub_i32 s13, s8, 64 +; GFX9-NEXT: s_lshr_b32 s0, s1, 31 +; GFX9-NEXT: s_mov_b32 s1, 0 +; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GFX9-NEXT: s_sub_i32 s11, s8, 64 ; GFX9-NEXT: s_sub_i32 s9, 64, s8 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64 ; GFX9-NEXT: s_cselect_b32 s16, 1, 0 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0 ; GFX9-NEXT: s_cselect_b32 s17, 1, 0 -; GFX9-NEXT: s_lshl_b64 s[2:3], s[14:15], s8 -; GFX9-NEXT: s_lshr_b64 s[10:11], s[14:15], s9 +; GFX9-NEXT: s_lshl_b64 s[2:3], s[12:13], s8 +; GFX9-NEXT: s_lshr_b64 s[14:15], s[12:13], s9 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 -; GFX9-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] -; GFX9-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 +; GFX9-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9] +; GFX9-NEXT: s_lshl_b64 s[12:13], s[12:13], s11 ; GFX9-NEXT: s_cmp_lg_u32 s16, 0 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 -; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] +; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[12:13] ; GFX9-NEXT: s_cmp_lg_u32 s17, 0 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] -; GFX9-NEXT: s_sub_i32 s14, s12, 64 -; GFX9-NEXT: s_sub_i32 s13, 64, s12 -; GFX9-NEXT: s_cmp_lt_u32 s12, 64 +; GFX9-NEXT: s_sub_i32 s14, s10, 64 +; GFX9-NEXT: s_sub_i32 s12, 64, s10 +; GFX9-NEXT: s_cmp_lt_u32 s10, 64 ; GFX9-NEXT: s_cselect_b32 s15, 1, 0 -; GFX9-NEXT: s_cmp_eq_u32 s12, 0 +; GFX9-NEXT: s_cmp_eq_u32 s10, 0 ; GFX9-NEXT: s_cselect_b32 s16, 1, 0 -; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 -; GFX9-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 -; GFX9-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 +; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], s10 +; GFX9-NEXT: s_lshr_b64 s[10:11], s[4:5], s10 +; GFX9-NEXT: s_lshl_b64 s[12:13], s[6:7], s12 ; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] ; GFX9-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 ; GFX9-NEXT: s_cmp_lg_u32 s15, 0 @@ -6019,94 +6016,92 @@ define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; ; GFX10-LABEL: s_fshr_i128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_movk_i32 s10, 0x7f -; GFX10-NEXT: s_mov_b32 s11, 0 +; GFX10-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f +; GFX10-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9] ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX10-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] -; GFX10-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] -; GFX10-NEXT: s_lshr_b32 s10, s1, 31 +; GFX10-NEXT: s_lshr_b32 s12, s1, 31 +; GFX10-NEXT: s_mov_b32 s13, 0 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 -; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[10:11] -; GFX10-NEXT: s_sub_i32 s13, s8, 64 +; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[12:13] +; GFX10-NEXT: s_sub_i32 s11, s8, 64 ; GFX10-NEXT: s_sub_i32 s9, 64, s8 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0 ; GFX10-NEXT: s_cselect_b32 s17, 1, 0 -; GFX10-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 +; GFX10-NEXT: s_lshr_b64 s[12:13], s[0:1], s9 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], s8 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 -; GFX10-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15] -; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s13 +; GFX10-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s11 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 -; GFX10-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] +; GFX10-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1] ; GFX10-NEXT: s_cmp_lg_u32 s17, 0 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] -; GFX10-NEXT: s_sub_i32 s14, s12, 64 -; GFX10-NEXT: s_sub_i32 s10, 64, s12 -; GFX10-NEXT: s_cmp_lt_u32 s12, 64 +; GFX10-NEXT: s_sub_i32 s14, s10, 64 +; GFX10-NEXT: s_sub_i32 s11, 64, s10 +; GFX10-NEXT: s_cmp_lt_u32 s10, 64 ; GFX10-NEXT: s_cselect_b32 s15, 1, 0 -; GFX10-NEXT: s_cmp_eq_u32 s12, 0 +; GFX10-NEXT: s_cmp_eq_u32 s10, 0 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0 -; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], s12 -; GFX10-NEXT: s_lshl_b64 s[10:11], s[6:7], s10 -; GFX10-NEXT: s_lshr_b64 s[12:13], s[6:7], s12 -; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] +; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], s10 +; GFX10-NEXT: s_lshl_b64 s[12:13], s[6:7], s11 +; GFX10-NEXT: s_lshr_b64 s[10:11], s[6:7], s10 +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13] ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7] ; GFX10-NEXT: s_cmp_lg_u32 s16, 0 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX10-NEXT: s_cmp_lg_u32 s15, 0 -; GFX10-NEXT: s_cselect_b64 s[4:5], s[12:13], 0 +; GFX10-NEXT: s_cselect_b64 s[4:5], s[10:11], 0 ; GFX10-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: s_fshr_i128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_movk_i32 s10, 0x7f -; GFX11-NEXT: s_mov_b32 s11, 0 +; GFX11-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f +; GFX11-NEXT: s_and_not1_b64 s[8:9], 0x7f, s[8:9] ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX11-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] -; GFX11-NEXT: s_and_not1_b64 s[8:9], s[10:11], s[8:9] -; GFX11-NEXT: s_lshr_b32 s10, s1, 31 +; GFX11-NEXT: s_lshr_b32 s12, s1, 31 +; GFX11-NEXT: s_mov_b32 s13, 0 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 -; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[10:11] -; GFX11-NEXT: s_sub_i32 s13, s8, 64 +; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[12:13] +; GFX11-NEXT: s_sub_i32 s11, s8, 64 ; GFX11-NEXT: s_sub_i32 s9, 64, s8 ; GFX11-NEXT: s_cmp_lt_u32 s8, 64 ; GFX11-NEXT: s_cselect_b32 s16, 1, 0 ; GFX11-NEXT: s_cmp_eq_u32 s8, 0 ; GFX11-NEXT: s_cselect_b32 s17, 1, 0 -; GFX11-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 +; GFX11-NEXT: s_lshr_b64 s[12:13], s[0:1], s9 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], s8 ; GFX11-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 -; GFX11-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15] -; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s13 +; GFX11-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] +; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s11 ; GFX11-NEXT: s_cmp_lg_u32 s16, 0 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 -; GFX11-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] +; GFX11-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1] ; GFX11-NEXT: s_cmp_lg_u32 s17, 0 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] -; GFX11-NEXT: s_sub_i32 s14, s12, 64 -; GFX11-NEXT: s_sub_i32 s10, 64, s12 -; GFX11-NEXT: s_cmp_lt_u32 s12, 64 +; GFX11-NEXT: s_sub_i32 s14, s10, 64 +; GFX11-NEXT: s_sub_i32 s11, 64, s10 +; GFX11-NEXT: s_cmp_lt_u32 s10, 64 ; GFX11-NEXT: s_cselect_b32 s15, 1, 0 -; GFX11-NEXT: s_cmp_eq_u32 s12, 0 +; GFX11-NEXT: s_cmp_eq_u32 s10, 0 ; GFX11-NEXT: s_cselect_b32 s16, 1, 0 -; GFX11-NEXT: s_lshr_b64 s[0:1], s[4:5], s12 -; GFX11-NEXT: s_lshl_b64 s[10:11], s[6:7], s10 -; GFX11-NEXT: s_lshr_b64 s[12:13], s[6:7], s12 -; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] +; GFX11-NEXT: s_lshr_b64 s[0:1], s[4:5], s10 +; GFX11-NEXT: s_lshl_b64 s[12:13], s[6:7], s11 +; GFX11-NEXT: s_lshr_b64 s[10:11], s[6:7], s10 +; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13] ; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 ; GFX11-NEXT: s_cmp_lg_u32 s15, 0 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7] ; GFX11-NEXT: s_cmp_lg_u32 s16, 0 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX11-NEXT: s_cmp_lg_u32 s15, 0 -; GFX11-NEXT: s_cselect_b64 s[4:5], s[12:13], 0 +; GFX11-NEXT: s_cselect_b64 s[4:5], s[10:11], 0 ; GFX11-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] ; GFX11-NEXT: ; return to shader part epilog @@ -6626,45 +6621,44 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) { ; GFX6-LABEL: v_fshr_i128_svs: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s6, 0x7f -; GFX6-NEXT: s_mov_b32 s7, 0 -; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX6-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] +; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], 1 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX6-NEXT: s_lshr_b32 s6, s1, 31 -; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 -; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] -; GFX6-NEXT: s_sub_i32 s9, s4, 64 +; GFX6-NEXT: s_lshr_b32 s0, s1, 31 +; GFX6-NEXT: s_mov_b32 s1, 0 +; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GFX6-NEXT: s_sub_i32 s7, s4, 64 ; GFX6-NEXT: s_sub_i32 s5, 64, s4 ; GFX6-NEXT: s_cmp_lt_u32 s4, 64 ; GFX6-NEXT: s_cselect_b32 s12, 1, 0 ; GFX6-NEXT: s_cmp_eq_u32 s4, 0 ; GFX6-NEXT: s_cselect_b32 s13, 1, 0 -; GFX6-NEXT: s_lshl_b64 s[2:3], s[10:11], s4 -; GFX6-NEXT: s_lshr_b64 s[6:7], s[10:11], s5 +; GFX6-NEXT: s_lshl_b64 s[2:3], s[8:9], s4 +; GFX6-NEXT: s_lshr_b64 s[10:11], s[8:9], s5 ; GFX6-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 -; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] -; GFX6-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 +; GFX6-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] +; GFX6-NEXT: s_lshl_b64 s[8:9], s[8:9], s7 ; GFX6-NEXT: s_cmp_lg_u32 s12, 0 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 -; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9] ; GFX6-NEXT: s_cmp_lg_u32 s13, 0 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[0:1], s[4:5] -; GFX6-NEXT: s_sub_i32 s0, s8, 64 -; GFX6-NEXT: s_sub_i32 s1, 64, s8 -; GFX6-NEXT: s_cmp_lt_u32 s8, 64 -; GFX6-NEXT: s_cselect_b32 s6, 1, 0 -; GFX6-NEXT: s_cmp_eq_u32 s8, 0 -; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s8 -; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s1 +; GFX6-NEXT: s_sub_i32 s0, s6, 64 +; GFX6-NEXT: s_sub_i32 s1, 64, s6 +; GFX6-NEXT: s_cmp_lt_u32 s6, 64 ; GFX6-NEXT: s_cselect_b32 s7, 1, 0 -; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s8 +; GFX6-NEXT: s_cmp_eq_u32 s6, 0 +; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s6 +; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s1 +; GFX6-NEXT: s_cselect_b32 s8, 1, 0 +; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s6 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s0 -; GFX6-NEXT: s_and_b32 s0, 1, s6 +; GFX6-NEXT: s_and_b32 s0, 1, s7 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX6-NEXT: s_and_b32 s0, 1, s7 +; GFX6-NEXT: s_and_b32 s0, 1, s8 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc ; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 @@ -6680,45 +6674,44 @@ define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; ; GFX8-LABEL: v_fshr_i128_svs: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_movk_i32 s6, 0x7f -; GFX8-NEXT: s_mov_b32 s7, 0 -; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX8-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] +; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], 1 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX8-NEXT: s_lshr_b32 s6, s1, 31 -; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 -; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] -; GFX8-NEXT: s_sub_i32 s9, s4, 64 +; GFX8-NEXT: s_lshr_b32 s0, s1, 31 +; GFX8-NEXT: s_mov_b32 s1, 0 +; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GFX8-NEXT: s_sub_i32 s7, s4, 64 ; GFX8-NEXT: s_sub_i32 s5, 64, s4 ; GFX8-NEXT: s_cmp_lt_u32 s4, 64 ; GFX8-NEXT: s_cselect_b32 s12, 1, 0 ; GFX8-NEXT: s_cmp_eq_u32 s4, 0 ; GFX8-NEXT: s_cselect_b32 s13, 1, 0 -; GFX8-NEXT: s_lshl_b64 s[2:3], s[10:11], s4 -; GFX8-NEXT: s_lshr_b64 s[6:7], s[10:11], s5 +; GFX8-NEXT: s_lshl_b64 s[2:3], s[8:9], s4 +; GFX8-NEXT: s_lshr_b64 s[10:11], s[8:9], s5 ; GFX8-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 -; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] -; GFX8-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 +; GFX8-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] +; GFX8-NEXT: s_lshl_b64 s[8:9], s[8:9], s7 ; GFX8-NEXT: s_cmp_lg_u32 s12, 0 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 -; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9] ; GFX8-NEXT: s_cmp_lg_u32 s13, 0 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[0:1], s[4:5] -; GFX8-NEXT: s_sub_i32 s0, s8, 64 -; GFX8-NEXT: s_sub_i32 s1, 64, s8 -; GFX8-NEXT: s_cmp_lt_u32 s8, 64 -; GFX8-NEXT: s_cselect_b32 s6, 1, 0 -; GFX8-NEXT: s_cmp_eq_u32 s8, 0 -; GFX8-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] -; GFX8-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] +; GFX8-NEXT: s_sub_i32 s0, s6, 64 +; GFX8-NEXT: s_sub_i32 s1, 64, s6 +; GFX8-NEXT: s_cmp_lt_u32 s6, 64 ; GFX8-NEXT: s_cselect_b32 s7, 1, 0 -; GFX8-NEXT: v_lshrrev_b64 v[8:9], s8, v[2:3] +; GFX8-NEXT: s_cmp_eq_u32 s6, 0 +; GFX8-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] +; GFX8-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] +; GFX8-NEXT: s_cselect_b32 s8, 1, 0 +; GFX8-NEXT: v_lshrrev_b64 v[8:9], s6, v[2:3] ; GFX8-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3] -; GFX8-NEXT: s_and_b32 s0, 1, s6 +; GFX8-NEXT: s_and_b32 s0, 1, s7 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_and_b32 s0, 1, s7 +; GFX8-NEXT: s_and_b32 s0, 1, s8 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 @@ -6734,45 +6727,44 @@ define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; ; GFX9-LABEL: v_fshr_i128_svs: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s6, 0x7f -; GFX9-NEXT: s_mov_b32 s7, 0 -; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX9-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] +; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], 1 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX9-NEXT: s_lshr_b32 s6, s1, 31 -; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 -; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] -; GFX9-NEXT: s_sub_i32 s9, s4, 64 +; GFX9-NEXT: s_lshr_b32 s0, s1, 31 +; GFX9-NEXT: s_mov_b32 s1, 0 +; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GFX9-NEXT: s_sub_i32 s7, s4, 64 ; GFX9-NEXT: s_sub_i32 s5, 64, s4 ; GFX9-NEXT: s_cmp_lt_u32 s4, 64 ; GFX9-NEXT: s_cselect_b32 s12, 1, 0 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 ; GFX9-NEXT: s_cselect_b32 s13, 1, 0 -; GFX9-NEXT: s_lshl_b64 s[2:3], s[10:11], s4 -; GFX9-NEXT: s_lshr_b64 s[6:7], s[10:11], s5 +; GFX9-NEXT: s_lshl_b64 s[2:3], s[8:9], s4 +; GFX9-NEXT: s_lshr_b64 s[10:11], s[8:9], s5 ; GFX9-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 -; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] -; GFX9-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 +; GFX9-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] +; GFX9-NEXT: s_lshl_b64 s[8:9], s[8:9], s7 ; GFX9-NEXT: s_cmp_lg_u32 s12, 0 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 -; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9] ; GFX9-NEXT: s_cmp_lg_u32 s13, 0 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[0:1], s[4:5] -; GFX9-NEXT: s_sub_i32 s0, s8, 64 -; GFX9-NEXT: s_sub_i32 s1, 64, s8 -; GFX9-NEXT: s_cmp_lt_u32 s8, 64 -; GFX9-NEXT: s_cselect_b32 s6, 1, 0 -; GFX9-NEXT: s_cmp_eq_u32 s8, 0 -; GFX9-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] -; GFX9-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] +; GFX9-NEXT: s_sub_i32 s0, s6, 64 +; GFX9-NEXT: s_sub_i32 s1, 64, s6 +; GFX9-NEXT: s_cmp_lt_u32 s6, 64 ; GFX9-NEXT: s_cselect_b32 s7, 1, 0 -; GFX9-NEXT: v_lshrrev_b64 v[8:9], s8, v[2:3] +; GFX9-NEXT: s_cmp_eq_u32 s6, 0 +; GFX9-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] +; GFX9-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] +; GFX9-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-NEXT: v_lshrrev_b64 v[8:9], s6, v[2:3] ; GFX9-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3] -; GFX9-NEXT: s_and_b32 s0, 1, s6 +; GFX9-NEXT: s_and_b32 s0, 1, s7 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX9-NEXT: s_and_b32 s0, 1, s7 +; GFX9-NEXT: s_and_b32 s0, 1, s8 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 @@ -6788,45 +6780,44 @@ define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; ; GFX10-LABEL: v_fshr_i128_svs: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_movk_i32 s6, 0x7f -; GFX10-NEXT: s_mov_b32 s7, 0 +; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX10-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] -; GFX10-NEXT: s_lshr_b32 s6, s1, 31 +; GFX10-NEXT: s_lshr_b32 s8, s1, 31 +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 -; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] -; GFX10-NEXT: s_sub_i32 s9, s4, 64 +; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] +; GFX10-NEXT: s_sub_i32 s7, s4, 64 ; GFX10-NEXT: s_sub_i32 s5, 64, s4 ; GFX10-NEXT: s_cmp_lt_u32 s4, 64 -; GFX10-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] +; GFX10-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0 -; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s5 +; GFX10-NEXT: s_lshr_b64 s[8:9], s[0:1], s5 ; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], s4 ; GFX10-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 -; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] -; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 +; GFX10-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s7 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], 0 -; GFX10-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] +; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] ; GFX10-NEXT: s_cmp_lg_u32 s13, 0 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] -; GFX10-NEXT: s_sub_i32 s0, 64, s8 +; GFX10-NEXT: s_sub_i32 s0, 64, s6 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] -; GFX10-NEXT: s_sub_i32 s0, s8, 64 -; GFX10-NEXT: s_cmp_lt_u32 s8, 64 +; GFX10-NEXT: s_sub_i32 s0, s6, 64 +; GFX10-NEXT: s_cmp_lt_u32 s6, 64 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] ; GFX10-NEXT: s_cselect_b32 s1, 1, 0 -; GFX10-NEXT: s_cmp_eq_u32 s8, 0 +; GFX10-NEXT: s_cmp_eq_u32 s6, 0 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 -; GFX10-NEXT: s_cselect_b32 s6, 1, 0 +; GFX10-NEXT: s_cselect_b32 s7, 1, 0 ; GFX10-NEXT: s_and_b32 s0, 1, s1 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 -; GFX10-NEXT: s_and_b32 s0, 1, s6 -; GFX10-NEXT: v_lshrrev_b64 v[2:3], s8, v[2:3] +; GFX10-NEXT: s_and_b32 s0, 1, s7 +; GFX10-NEXT: v_lshrrev_b64 v[2:3], s6, v[2:3] ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo @@ -6842,46 +6833,45 @@ define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; ; GFX11-LABEL: v_fshr_i128_svs: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_movk_i32 s6, 0x7f -; GFX11-NEXT: s_mov_b32 s7, 0 +; GFX11-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX11-NEXT: s_and_not1_b64 s[4:5], 0x7f, s[4:5] ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX11-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX11-NEXT: s_and_not1_b64 s[4:5], s[6:7], s[4:5] -; GFX11-NEXT: s_lshr_b32 s6, s1, 31 +; GFX11-NEXT: s_lshr_b32 s8, s1, 31 +; GFX11-NEXT: s_mov_b32 s9, 0 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 -; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] -; GFX11-NEXT: s_sub_i32 s9, s4, 64 +; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] +; GFX11-NEXT: s_sub_i32 s7, s4, 64 ; GFX11-NEXT: s_sub_i32 s5, 64, s4 ; GFX11-NEXT: s_cmp_lt_u32 s4, 64 -; GFX11-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] +; GFX11-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] ; GFX11-NEXT: s_cselect_b32 s12, 1, 0 ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 ; GFX11-NEXT: s_cselect_b32 s13, 1, 0 -; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s5 +; GFX11-NEXT: s_lshr_b64 s[8:9], s[0:1], s5 ; GFX11-NEXT: s_lshl_b64 s[10:11], s[2:3], s4 ; GFX11-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 -; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] -; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 +; GFX11-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] +; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s7 ; GFX11-NEXT: s_cmp_lg_u32 s12, 0 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], 0 -; GFX11-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] +; GFX11-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] ; GFX11-NEXT: s_cmp_lg_u32 s13, 0 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] -; GFX11-NEXT: s_sub_i32 s0, 64, s8 +; GFX11-NEXT: s_sub_i32 s0, 64, s6 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] -; GFX11-NEXT: s_sub_i32 s0, s8, 64 -; GFX11-NEXT: s_cmp_lt_u32 s8, 64 +; GFX11-NEXT: s_sub_i32 s0, s6, 64 +; GFX11-NEXT: s_cmp_lt_u32 s6, 64 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] ; GFX11-NEXT: s_cselect_b32 s1, 1, 0 -; GFX11-NEXT: s_cmp_eq_u32 s8, 0 +; GFX11-NEXT: s_cmp_eq_u32 s6, 0 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v6 -; GFX11-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-NEXT: s_cselect_b32 s7, 1, 0 ; GFX11-NEXT: s_and_b32 s0, 1, s1 ; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 -; GFX11-NEXT: s_and_b32 s0, 1, s6 -; GFX11-NEXT: v_lshrrev_b64 v[2:3], s8, v[2:3] +; GFX11-NEXT: s_and_b32 s0, 1, s7 +; GFX11-NEXT: v_lshrrev_b64 v[2:3], s6, v[2:3] ; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -6904,42 +6894,41 @@ define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) { ; GFX6-LABEL: v_fshr_i128_vss: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX6-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 ; GFX6-NEXT: s_sub_i32 s5, s4, 64 -; GFX6-NEXT: s_sub_i32 s6, 64, s4 +; GFX6-NEXT: s_sub_i32 s7, 64, s4 ; GFX6-NEXT: v_lshl_b64 v[4:5], v[0:1], 1 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1 ; GFX6-NEXT: s_cmp_lt_u32 s4, 64 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v0 -; GFX6-NEXT: s_cselect_b32 s7, 1, 0 +; GFX6-NEXT: s_cselect_b32 s8, 1, 0 ; GFX6-NEXT: s_cmp_eq_u32 s4, 0 ; GFX6-NEXT: s_cselect_b32 s9, 1, 0 -; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], s6 +; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], s7 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s4 ; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], s4 -; GFX6-NEXT: s_and_b32 s4, 1, s7 +; GFX6-NEXT: s_and_b32 s4, 1, s8 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX6-NEXT: s_and_b32 s4, 1, s9 -; GFX6-NEXT: s_sub_i32 s10, s8, 64 -; GFX6-NEXT: s_sub_i32 s9, 64, s8 +; GFX6-NEXT: s_sub_i32 s10, s6, 64 +; GFX6-NEXT: s_sub_i32 s8, 64, s6 ; GFX6-NEXT: v_or_b32_e32 v6, v0, v6 ; GFX6-NEXT: v_or_b32_e32 v7, v1, v7 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[4:5], s5 -; GFX6-NEXT: s_cmp_lt_u32 s8, 64 +; GFX6-NEXT: s_cmp_lt_u32 s6, 64 ; GFX6-NEXT: s_cselect_b32 s11, 1, 0 -; GFX6-NEXT: s_cmp_eq_u32 s8, 0 +; GFX6-NEXT: s_cmp_eq_u32 s6, 0 ; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX6-NEXT: s_cselect_b32 s12, 1, 0 -; GFX6-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 -; GFX6-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 -; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 +; GFX6-NEXT: s_lshr_b64 s[4:5], s[2:3], s6 +; GFX6-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 +; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 ; GFX6-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] ; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 ; GFX6-NEXT: s_cmp_lg_u32 s11, 0 @@ -6958,42 +6947,41 @@ define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; ; GFX8-LABEL: v_fshr_i128_vss: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX8-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] ; GFX8-NEXT: s_sub_i32 s5, s4, 64 -; GFX8-NEXT: s_sub_i32 s6, 64, s4 +; GFX8-NEXT: s_sub_i32 s7, 64, s4 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1] ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1 ; GFX8-NEXT: s_cmp_lt_u32 s4, 64 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v0 -; GFX8-NEXT: s_cselect_b32 s7, 1, 0 +; GFX8-NEXT: s_cselect_b32 s8, 1, 0 ; GFX8-NEXT: s_cmp_eq_u32 s4, 0 ; GFX8-NEXT: s_cselect_b32 s9, 1, 0 -; GFX8-NEXT: v_lshrrev_b64 v[0:1], s6, v[4:5] +; GFX8-NEXT: v_lshrrev_b64 v[0:1], s7, v[4:5] ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] ; GFX8-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5] -; GFX8-NEXT: s_and_b32 s4, 1, s7 +; GFX8-NEXT: s_and_b32 s4, 1, s8 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX8-NEXT: s_and_b32 s4, 1, s9 -; GFX8-NEXT: s_sub_i32 s10, s8, 64 -; GFX8-NEXT: s_sub_i32 s9, 64, s8 +; GFX8-NEXT: s_sub_i32 s10, s6, 64 +; GFX8-NEXT: s_sub_i32 s8, 64, s6 ; GFX8-NEXT: v_or_b32_e32 v6, v0, v6 ; GFX8-NEXT: v_or_b32_e32 v7, v1, v7 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], s5, v[4:5] -; GFX8-NEXT: s_cmp_lt_u32 s8, 64 +; GFX8-NEXT: s_cmp_lt_u32 s6, 64 ; GFX8-NEXT: s_cselect_b32 s11, 1, 0 -; GFX8-NEXT: s_cmp_eq_u32 s8, 0 +; GFX8-NEXT: s_cmp_eq_u32 s6, 0 ; GFX8-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX8-NEXT: s_cselect_b32 s12, 1, 0 -; GFX8-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 -; GFX8-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 -; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 +; GFX8-NEXT: s_lshr_b64 s[4:5], s[2:3], s6 +; GFX8-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 +; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 ; GFX8-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] ; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 ; GFX8-NEXT: s_cmp_lg_u32 s11, 0 @@ -7012,42 +7000,41 @@ define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; ; GFX9-LABEL: v_fshr_i128_vss: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX9-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] ; GFX9-NEXT: s_sub_i32 s5, s4, 64 -; GFX9-NEXT: s_sub_i32 s6, 64, s4 +; GFX9-NEXT: s_sub_i32 s7, 64, s4 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1] ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1 ; GFX9-NEXT: s_cmp_lt_u32 s4, 64 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v0 -; GFX9-NEXT: s_cselect_b32 s7, 1, 0 +; GFX9-NEXT: s_cselect_b32 s8, 1, 0 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 ; GFX9-NEXT: s_cselect_b32 s9, 1, 0 -; GFX9-NEXT: v_lshrrev_b64 v[0:1], s6, v[4:5] +; GFX9-NEXT: v_lshrrev_b64 v[0:1], s7, v[4:5] ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] ; GFX9-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5] -; GFX9-NEXT: s_and_b32 s4, 1, s7 +; GFX9-NEXT: s_and_b32 s4, 1, s8 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX9-NEXT: s_and_b32 s4, 1, s9 -; GFX9-NEXT: s_sub_i32 s10, s8, 64 -; GFX9-NEXT: s_sub_i32 s9, 64, s8 +; GFX9-NEXT: s_sub_i32 s10, s6, 64 +; GFX9-NEXT: s_sub_i32 s8, 64, s6 ; GFX9-NEXT: v_or_b32_e32 v6, v0, v6 ; GFX9-NEXT: v_or_b32_e32 v7, v1, v7 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], s5, v[4:5] -; GFX9-NEXT: s_cmp_lt_u32 s8, 64 +; GFX9-NEXT: s_cmp_lt_u32 s6, 64 ; GFX9-NEXT: s_cselect_b32 s11, 1, 0 -; GFX9-NEXT: s_cmp_eq_u32 s8, 0 +; GFX9-NEXT: s_cmp_eq_u32 s6, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX9-NEXT: s_cselect_b32 s12, 1, 0 -; GFX9-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 -; GFX9-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 -; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 +; GFX9-NEXT: s_lshr_b64 s[4:5], s[2:3], s6 +; GFX9-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 +; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 ; GFX9-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] ; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 ; GFX9-NEXT: s_cmp_lg_u32 s11, 0 @@ -7068,41 +7055,40 @@ define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; GFX10: ; %bb.0: ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v1 -; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] -; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX10-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5] +; GFX10-NEXT: s_sub_i32 s7, 64, s4 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX10-NEXT: s_sub_i32 s6, 64, s4 ; GFX10-NEXT: s_sub_i32 s5, s4, 64 ; GFX10-NEXT: s_cmp_lt_u32 s4, 64 -; GFX10-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] +; GFX10-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1] +; GFX10-NEXT: s_cselect_b32 s8, 1, 0 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] -; GFX10-NEXT: s_cselect_b32 s7, 1, 0 ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], s4, v[0:1] ; GFX10-NEXT: s_cselect_b32 s9, 1, 0 -; GFX10-NEXT: s_and_b32 s4, 1, s7 +; GFX10-NEXT: s_and_b32 s4, 1, s8 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX10-NEXT: s_and_b32 s4, 1, s9 -; GFX10-NEXT: s_sub_i32 s10, s8, 64 -; GFX10-NEXT: s_sub_i32 s6, 64, s8 -; GFX10-NEXT: s_cmp_lt_u32 s8, 64 +; GFX10-NEXT: s_sub_i32 s10, s6, 64 +; GFX10-NEXT: s_sub_i32 s7, 64, s6 +; GFX10-NEXT: s_cmp_lt_u32 s6, 64 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo ; GFX10-NEXT: s_cselect_b32 s11, 1, 0 -; GFX10-NEXT: s_cmp_eq_u32 s8, 0 +; GFX10-NEXT: s_cmp_eq_u32 s6, 0 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 -; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s8 -; GFX10-NEXT: s_lshl_b64 s[6:7], s[2:3], s6 -; GFX10-NEXT: s_lshr_b64 s[8:9], s[2:3], s8 -; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] +; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s6 +; GFX10-NEXT: s_lshl_b64 s[8:9], s[2:3], s7 +; GFX10-NEXT: s_lshr_b64 s[6:7], s[2:3], s6 +; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo @@ -7112,7 +7098,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] ; GFX10-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v6 -; GFX10-NEXT: s_cselect_b64 s[2:3], s[8:9], 0 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[6:7], 0 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v7 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 @@ -7122,39 +7108,39 @@ define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; GFX11: ; %bb.0: ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 31, v1 -; GFX11-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] -; GFX11-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] -; GFX11-NEXT: s_and_not1_b64 s[4:5], s[6:7], s[4:5] +; GFX11-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f +; GFX11-NEXT: s_and_not1_b64 s[4:5], 0x7f, s[4:5] +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_sub_i32 s7, 64, s4 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX11-NEXT: s_sub_i32 s6, 64, s4 ; GFX11-NEXT: s_sub_i32 s5, s4, 64 ; GFX11-NEXT: s_cmp_lt_u32 s4, 64 -; GFX11-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] +; GFX11-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1] +; GFX11-NEXT: s_cselect_b32 s8, 1, 0 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] -; GFX11-NEXT: s_cselect_b32 s7, 1, 0 ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 ; GFX11-NEXT: v_lshlrev_b64 v[8:9], s4, v[0:1] ; GFX11-NEXT: s_cselect_b32 s9, 1, 0 -; GFX11-NEXT: s_and_b32 s4, 1, s7 +; GFX11-NEXT: s_and_b32 s4, 1, s8 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX11-NEXT: s_and_b32 s4, 1, s9 -; GFX11-NEXT: s_sub_i32 s10, s8, 64 -; GFX11-NEXT: s_sub_i32 s6, 64, s8 -; GFX11-NEXT: s_cmp_lt_u32 s8, 64 +; GFX11-NEXT: s_sub_i32 s10, s6, 64 +; GFX11-NEXT: s_sub_i32 s7, 64, s6 +; GFX11-NEXT: s_cmp_lt_u32 s6, 64 ; GFX11-NEXT: v_dual_cndmask_b32 v6, 0, v8 :: v_dual_cndmask_b32 v7, 0, v9 ; GFX11-NEXT: s_cselect_b32 s11, 1, 0 -; GFX11-NEXT: s_cmp_eq_u32 s8, 0 +; GFX11-NEXT: s_cmp_eq_u32 s6, 0 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX11-NEXT: s_cselect_b32 s12, 1, 0 -; GFX11-NEXT: s_lshr_b64 s[4:5], s[0:1], s8 -; GFX11-NEXT: s_lshl_b64 s[6:7], s[2:3], s6 -; GFX11-NEXT: s_lshr_b64 s[8:9], s[2:3], s8 -; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] +; GFX11-NEXT: s_lshr_b64 s[4:5], s[0:1], s6 +; GFX11-NEXT: s_lshl_b64 s[8:9], s[2:3], s7 +; GFX11-NEXT: s_lshr_b64 s[6:7], s[2:3], s6 +; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 ; GFX11-NEXT: s_cmp_lg_u32 s11, 0 ; GFX11-NEXT: v_dual_cndmask_b32 v2, v0, v2 :: v_dual_cndmask_b32 v3, v1, v3 @@ -7163,7 +7149,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] ; GFX11-NEXT: s_cmp_lg_u32 s11, 0 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v6 -; GFX11-NEXT: s_cselect_b64 s[2:3], s[8:9], 0 +; GFX11-NEXT: s_cselect_b64 s[2:3], s[6:7], 0 ; GFX11-NEXT: v_or_b32_e32 v1, s1, v7 ; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 @@ -7301,56 +7287,54 @@ define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) { define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) { ; GFX6-LABEL: s_fshr_v2i128: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s18, 0x7f -; GFX6-NEXT: s_mov_b32 s19, 0 -; GFX6-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] -; GFX6-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] -; GFX6-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 +; GFX6-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f +; GFX6-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17] ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX6-NEXT: s_lshr_b32 s0, s1, 31 -; GFX6-NEXT: s_mov_b32 s1, s19 -; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] -; GFX6-NEXT: s_sub_i32 s23, s16, 64 +; GFX6-NEXT: s_lshr_b32 s24, s1, 31 +; GFX6-NEXT: s_mov_b32 s25, 0 +; GFX6-NEXT: s_lshl_b64 s[22:23], s[0:1], 1 +; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[24:25] +; GFX6-NEXT: s_sub_i32 s19, s16, 64 ; GFX6-NEXT: s_sub_i32 s17, 64, s16 ; GFX6-NEXT: s_cmp_lt_u32 s16, 64 -; GFX6-NEXT: s_cselect_b32 s28, 1, 0 +; GFX6-NEXT: s_cselect_b32 s24, 1, 0 ; GFX6-NEXT: s_cmp_eq_u32 s16, 0 -; GFX6-NEXT: s_cselect_b32 s29, 1, 0 -; GFX6-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 -; GFX6-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 +; GFX6-NEXT: s_cselect_b32 s28, 1, 0 +; GFX6-NEXT: s_lshl_b64 s[2:3], s[22:23], s16 +; GFX6-NEXT: s_lshr_b64 s[26:27], s[22:23], s17 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 ; GFX6-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] -; GFX6-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 -; GFX6-NEXT: s_cmp_lg_u32 s28, 0 +; GFX6-NEXT: s_lshl_b64 s[22:23], s[22:23], s19 +; GFX6-NEXT: s_cmp_lg_u32 s24, 0 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 -; GFX6-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] -; GFX6-NEXT: s_cmp_lg_u32 s29, 0 +; GFX6-NEXT: s_cselect_b64 s[16:17], s[16:17], s[22:23] +; GFX6-NEXT: s_cmp_lg_u32 s28, 0 ; GFX6-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] -; GFX6-NEXT: s_sub_i32 s26, s22, 64 -; GFX6-NEXT: s_sub_i32 s24, 64, s22 -; GFX6-NEXT: s_cmp_lt_u32 s22, 64 +; GFX6-NEXT: s_sub_i32 s24, s18, 64 +; GFX6-NEXT: s_sub_i32 s22, 64, s18 +; GFX6-NEXT: s_cmp_lt_u32 s18, 64 +; GFX6-NEXT: s_cselect_b32 s26, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s18, 0 ; GFX6-NEXT: s_cselect_b32 s27, 1, 0 -; GFX6-NEXT: s_cmp_eq_u32 s22, 0 -; GFX6-NEXT: s_cselect_b32 s28, 1, 0 -; GFX6-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 -; GFX6-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 -; GFX6-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 -; GFX6-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] -; GFX6-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 +; GFX6-NEXT: s_lshr_b64 s[0:1], s[10:11], s18 +; GFX6-NEXT: s_lshr_b64 s[18:19], s[8:9], s18 +; GFX6-NEXT: s_lshl_b64 s[22:23], s[10:11], s22 +; GFX6-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23] +; GFX6-NEXT: s_lshr_b64 s[10:11], s[10:11], s24 +; GFX6-NEXT: s_cmp_lg_u32 s26, 0 +; GFX6-NEXT: s_cselect_b64 s[10:11], s[18:19], s[10:11] ; GFX6-NEXT: s_cmp_lg_u32 s27, 0 -; GFX6-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] -; GFX6-NEXT: s_cmp_lg_u32 s28, 0 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] -; GFX6-NEXT: s_cmp_lg_u32 s27, 0 +; GFX6-NEXT: s_cmp_lg_u32 s26, 0 ; GFX6-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] ; GFX6-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] -; GFX6-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] -; GFX6-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] +; GFX6-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f +; GFX6-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21] ; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 -; GFX6-NEXT: s_lshr_b32 s18, s5, 31 +; GFX6-NEXT: s_lshr_b32 s24, s5, 31 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 -; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[18:19] +; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[24:25] ; GFX6-NEXT: s_sub_i32 s9, s10, 64 ; GFX6-NEXT: s_sub_i32 s11, 64, s10 ; GFX6-NEXT: s_cmp_lt_u32 s10, 64 @@ -7390,56 +7374,54 @@ define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX8-LABEL: s_fshr_v2i128: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_movk_i32 s18, 0x7f -; GFX8-NEXT: s_mov_b32 s19, 0 -; GFX8-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] -; GFX8-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] -; GFX8-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 +; GFX8-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f +; GFX8-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17] ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX8-NEXT: s_lshr_b32 s0, s1, 31 -; GFX8-NEXT: s_mov_b32 s1, s19 -; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] -; GFX8-NEXT: s_sub_i32 s23, s16, 64 +; GFX8-NEXT: s_lshr_b32 s24, s1, 31 +; GFX8-NEXT: s_mov_b32 s25, 0 +; GFX8-NEXT: s_lshl_b64 s[22:23], s[0:1], 1 +; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[24:25] +; GFX8-NEXT: s_sub_i32 s19, s16, 64 ; GFX8-NEXT: s_sub_i32 s17, 64, s16 ; GFX8-NEXT: s_cmp_lt_u32 s16, 64 -; GFX8-NEXT: s_cselect_b32 s28, 1, 0 +; GFX8-NEXT: s_cselect_b32 s24, 1, 0 ; GFX8-NEXT: s_cmp_eq_u32 s16, 0 -; GFX8-NEXT: s_cselect_b32 s29, 1, 0 -; GFX8-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 -; GFX8-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 +; GFX8-NEXT: s_cselect_b32 s28, 1, 0 +; GFX8-NEXT: s_lshl_b64 s[2:3], s[22:23], s16 +; GFX8-NEXT: s_lshr_b64 s[26:27], s[22:23], s17 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 ; GFX8-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] -; GFX8-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 -; GFX8-NEXT: s_cmp_lg_u32 s28, 0 +; GFX8-NEXT: s_lshl_b64 s[22:23], s[22:23], s19 +; GFX8-NEXT: s_cmp_lg_u32 s24, 0 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 -; GFX8-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] -; GFX8-NEXT: s_cmp_lg_u32 s29, 0 +; GFX8-NEXT: s_cselect_b64 s[16:17], s[16:17], s[22:23] +; GFX8-NEXT: s_cmp_lg_u32 s28, 0 ; GFX8-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] -; GFX8-NEXT: s_sub_i32 s26, s22, 64 -; GFX8-NEXT: s_sub_i32 s24, 64, s22 -; GFX8-NEXT: s_cmp_lt_u32 s22, 64 +; GFX8-NEXT: s_sub_i32 s24, s18, 64 +; GFX8-NEXT: s_sub_i32 s22, 64, s18 +; GFX8-NEXT: s_cmp_lt_u32 s18, 64 +; GFX8-NEXT: s_cselect_b32 s26, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s18, 0 ; GFX8-NEXT: s_cselect_b32 s27, 1, 0 -; GFX8-NEXT: s_cmp_eq_u32 s22, 0 -; GFX8-NEXT: s_cselect_b32 s28, 1, 0 -; GFX8-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 -; GFX8-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 -; GFX8-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 -; GFX8-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] -; GFX8-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 +; GFX8-NEXT: s_lshr_b64 s[0:1], s[10:11], s18 +; GFX8-NEXT: s_lshr_b64 s[18:19], s[8:9], s18 +; GFX8-NEXT: s_lshl_b64 s[22:23], s[10:11], s22 +; GFX8-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23] +; GFX8-NEXT: s_lshr_b64 s[10:11], s[10:11], s24 +; GFX8-NEXT: s_cmp_lg_u32 s26, 0 +; GFX8-NEXT: s_cselect_b64 s[10:11], s[18:19], s[10:11] ; GFX8-NEXT: s_cmp_lg_u32 s27, 0 -; GFX8-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] -; GFX8-NEXT: s_cmp_lg_u32 s28, 0 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] -; GFX8-NEXT: s_cmp_lg_u32 s27, 0 +; GFX8-NEXT: s_cmp_lg_u32 s26, 0 ; GFX8-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] ; GFX8-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] -; GFX8-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] -; GFX8-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] +; GFX8-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f +; GFX8-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21] ; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 -; GFX8-NEXT: s_lshr_b32 s18, s5, 31 +; GFX8-NEXT: s_lshr_b32 s24, s5, 31 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 -; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[18:19] +; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[24:25] ; GFX8-NEXT: s_sub_i32 s9, s10, 64 ; GFX8-NEXT: s_sub_i32 s11, 64, s10 ; GFX8-NEXT: s_cmp_lt_u32 s10, 64 @@ -7479,56 +7461,54 @@ define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX9-LABEL: s_fshr_v2i128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s18, 0x7f -; GFX9-NEXT: s_mov_b32 s19, 0 -; GFX9-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] -; GFX9-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] -; GFX9-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 +; GFX9-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f +; GFX9-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17] ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX9-NEXT: s_lshr_b32 s0, s1, 31 -; GFX9-NEXT: s_mov_b32 s1, s19 -; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] -; GFX9-NEXT: s_sub_i32 s23, s16, 64 +; GFX9-NEXT: s_lshr_b32 s24, s1, 31 +; GFX9-NEXT: s_mov_b32 s25, 0 +; GFX9-NEXT: s_lshl_b64 s[22:23], s[0:1], 1 +; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[24:25] +; GFX9-NEXT: s_sub_i32 s19, s16, 64 ; GFX9-NEXT: s_sub_i32 s17, 64, s16 ; GFX9-NEXT: s_cmp_lt_u32 s16, 64 -; GFX9-NEXT: s_cselect_b32 s28, 1, 0 +; GFX9-NEXT: s_cselect_b32 s24, 1, 0 ; GFX9-NEXT: s_cmp_eq_u32 s16, 0 -; GFX9-NEXT: s_cselect_b32 s29, 1, 0 -; GFX9-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 -; GFX9-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 +; GFX9-NEXT: s_cselect_b32 s28, 1, 0 +; GFX9-NEXT: s_lshl_b64 s[2:3], s[22:23], s16 +; GFX9-NEXT: s_lshr_b64 s[26:27], s[22:23], s17 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 ; GFX9-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] -; GFX9-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 -; GFX9-NEXT: s_cmp_lg_u32 s28, 0 +; GFX9-NEXT: s_lshl_b64 s[22:23], s[22:23], s19 +; GFX9-NEXT: s_cmp_lg_u32 s24, 0 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 -; GFX9-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] -; GFX9-NEXT: s_cmp_lg_u32 s29, 0 +; GFX9-NEXT: s_cselect_b64 s[16:17], s[16:17], s[22:23] +; GFX9-NEXT: s_cmp_lg_u32 s28, 0 ; GFX9-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] -; GFX9-NEXT: s_sub_i32 s26, s22, 64 -; GFX9-NEXT: s_sub_i32 s24, 64, s22 -; GFX9-NEXT: s_cmp_lt_u32 s22, 64 +; GFX9-NEXT: s_sub_i32 s24, s18, 64 +; GFX9-NEXT: s_sub_i32 s22, 64, s18 +; GFX9-NEXT: s_cmp_lt_u32 s18, 64 +; GFX9-NEXT: s_cselect_b32 s26, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s18, 0 ; GFX9-NEXT: s_cselect_b32 s27, 1, 0 -; GFX9-NEXT: s_cmp_eq_u32 s22, 0 -; GFX9-NEXT: s_cselect_b32 s28, 1, 0 -; GFX9-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 -; GFX9-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 -; GFX9-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 -; GFX9-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] -; GFX9-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 +; GFX9-NEXT: s_lshr_b64 s[0:1], s[10:11], s18 +; GFX9-NEXT: s_lshr_b64 s[18:19], s[8:9], s18 +; GFX9-NEXT: s_lshl_b64 s[22:23], s[10:11], s22 +; GFX9-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23] +; GFX9-NEXT: s_lshr_b64 s[10:11], s[10:11], s24 +; GFX9-NEXT: s_cmp_lg_u32 s26, 0 +; GFX9-NEXT: s_cselect_b64 s[10:11], s[18:19], s[10:11] ; GFX9-NEXT: s_cmp_lg_u32 s27, 0 -; GFX9-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] -; GFX9-NEXT: s_cmp_lg_u32 s28, 0 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] -; GFX9-NEXT: s_cmp_lg_u32 s27, 0 +; GFX9-NEXT: s_cmp_lg_u32 s26, 0 ; GFX9-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] ; GFX9-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] -; GFX9-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] -; GFX9-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] +; GFX9-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f +; GFX9-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21] ; GFX9-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 -; GFX9-NEXT: s_lshr_b32 s18, s5, 31 +; GFX9-NEXT: s_lshr_b32 s24, s5, 31 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 -; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[18:19] +; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[24:25] ; GFX9-NEXT: s_sub_i32 s9, s10, 64 ; GFX9-NEXT: s_sub_i32 s11, 64, s10 ; GFX9-NEXT: s_cmp_lt_u32 s10, 64 @@ -7568,56 +7548,54 @@ define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX10-LABEL: s_fshr_v2i128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_movk_i32 s18, 0x7f -; GFX10-NEXT: s_mov_b32 s19, 0 +; GFX10-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f +; GFX10-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17] ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX10-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] -; GFX10-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] -; GFX10-NEXT: s_lshr_b32 s24, s1, 31 -; GFX10-NEXT: s_mov_b32 s25, s19 +; GFX10-NEXT: s_lshr_b32 s22, s1, 31 +; GFX10-NEXT: s_mov_b32 s23, 0 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 -; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[24:25] -; GFX10-NEXT: s_sub_i32 s23, s16, 64 +; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[22:23] +; GFX10-NEXT: s_sub_i32 s19, s16, 64 ; GFX10-NEXT: s_sub_i32 s17, 64, s16 ; GFX10-NEXT: s_cmp_lt_u32 s16, 64 -; GFX10-NEXT: s_cselect_b32 s28, 1, 0 +; GFX10-NEXT: s_cselect_b32 s22, 1, 0 ; GFX10-NEXT: s_cmp_eq_u32 s16, 0 -; GFX10-NEXT: s_cselect_b32 s29, 1, 0 +; GFX10-NEXT: s_cselect_b32 s28, 1, 0 ; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s17 ; GFX10-NEXT: s_lshl_b64 s[26:27], s[2:3], s16 ; GFX10-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 ; GFX10-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] -; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s23 -; GFX10-NEXT: s_cmp_lg_u32 s28, 0 +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s19 +; GFX10-NEXT: s_cmp_lg_u32 s22, 0 ; GFX10-NEXT: s_cselect_b64 s[16:17], s[16:17], 0 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1] -; GFX10-NEXT: s_cmp_lg_u32 s29, 0 +; GFX10-NEXT: s_cmp_lg_u32 s28, 0 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] -; GFX10-NEXT: s_sub_i32 s26, s22, 64 -; GFX10-NEXT: s_sub_i32 s23, 64, s22 -; GFX10-NEXT: s_cmp_lt_u32 s22, 64 +; GFX10-NEXT: s_sub_i32 s22, s18, 64 +; GFX10-NEXT: s_sub_i32 s19, 64, s18 +; GFX10-NEXT: s_cmp_lt_u32 s18, 64 +; GFX10-NEXT: s_cselect_b32 s26, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s18, 0 ; GFX10-NEXT: s_cselect_b32 s27, 1, 0 -; GFX10-NEXT: s_cmp_eq_u32 s22, 0 -; GFX10-NEXT: s_cselect_b32 s28, 1, 0 -; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], s22 -; GFX10-NEXT: s_lshl_b64 s[24:25], s[10:11], s23 -; GFX10-NEXT: s_lshr_b64 s[22:23], s[10:11], s22 +; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], s18 +; GFX10-NEXT: s_lshl_b64 s[24:25], s[10:11], s19 +; GFX10-NEXT: s_lshr_b64 s[18:19], s[10:11], s18 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[24:25] -; GFX10-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 -; GFX10-NEXT: s_cmp_lg_u32 s27, 0 +; GFX10-NEXT: s_lshr_b64 s[10:11], s[10:11], s22 +; GFX10-NEXT: s_cmp_lg_u32 s26, 0 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11] -; GFX10-NEXT: s_cmp_lg_u32 s28, 0 -; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] ; GFX10-NEXT: s_cmp_lg_u32 s27, 0 -; GFX10-NEXT: s_cselect_b64 s[8:9], s[22:23], 0 -; GFX10-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] -; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] -; GFX10-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] +; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] +; GFX10-NEXT: s_cmp_lg_u32 s26, 0 +; GFX10-NEXT: s_cselect_b64 s[8:9], s[18:19], 0 +; GFX10-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21] ; GFX10-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 -; GFX10-NEXT: s_lshr_b32 s18, s5, 31 +; GFX10-NEXT: s_lshr_b32 s22, s5, 31 +; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] +; GFX10-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f ; GFX10-NEXT: s_or_b64 s[0:1], s[16:17], s[0:1] ; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], 1 -; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[18:19] +; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[22:23] ; GFX10-NEXT: s_sub_i32 s9, s10, 64 ; GFX10-NEXT: s_sub_i32 s11, 64, s10 ; GFX10-NEXT: s_cmp_lt_u32 s10, 64 @@ -7657,56 +7635,54 @@ define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX11-LABEL: s_fshr_v2i128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_movk_i32 s18, 0x7f -; GFX11-NEXT: s_mov_b32 s19, 0 +; GFX11-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f +; GFX11-NEXT: s_and_not1_b64 s[16:17], 0x7f, s[16:17] ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GFX11-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] -; GFX11-NEXT: s_and_not1_b64 s[16:17], s[18:19], s[16:17] -; GFX11-NEXT: s_lshr_b32 s24, s1, 31 -; GFX11-NEXT: s_mov_b32 s25, s19 +; GFX11-NEXT: s_lshr_b32 s22, s1, 31 +; GFX11-NEXT: s_mov_b32 s23, 0 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 -; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[24:25] -; GFX11-NEXT: s_sub_i32 s23, s16, 64 +; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[22:23] +; GFX11-NEXT: s_sub_i32 s19, s16, 64 ; GFX11-NEXT: s_sub_i32 s17, 64, s16 ; GFX11-NEXT: s_cmp_lt_u32 s16, 64 -; GFX11-NEXT: s_cselect_b32 s28, 1, 0 +; GFX11-NEXT: s_cselect_b32 s22, 1, 0 ; GFX11-NEXT: s_cmp_eq_u32 s16, 0 -; GFX11-NEXT: s_cselect_b32 s29, 1, 0 +; GFX11-NEXT: s_cselect_b32 s28, 1, 0 ; GFX11-NEXT: s_lshr_b64 s[24:25], s[0:1], s17 ; GFX11-NEXT: s_lshl_b64 s[26:27], s[2:3], s16 ; GFX11-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 ; GFX11-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] -; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s23 -; GFX11-NEXT: s_cmp_lg_u32 s28, 0 +; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s19 +; GFX11-NEXT: s_cmp_lg_u32 s22, 0 ; GFX11-NEXT: s_cselect_b64 s[16:17], s[16:17], 0 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1] -; GFX11-NEXT: s_cmp_lg_u32 s29, 0 +; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] -; GFX11-NEXT: s_sub_i32 s26, s22, 64 -; GFX11-NEXT: s_sub_i32 s23, 64, s22 -; GFX11-NEXT: s_cmp_lt_u32 s22, 64 +; GFX11-NEXT: s_sub_i32 s22, s18, 64 +; GFX11-NEXT: s_sub_i32 s19, 64, s18 +; GFX11-NEXT: s_cmp_lt_u32 s18, 64 +; GFX11-NEXT: s_cselect_b32 s26, 1, 0 +; GFX11-NEXT: s_cmp_eq_u32 s18, 0 ; GFX11-NEXT: s_cselect_b32 s27, 1, 0 -; GFX11-NEXT: s_cmp_eq_u32 s22, 0 -; GFX11-NEXT: s_cselect_b32 s28, 1, 0 -; GFX11-NEXT: s_lshr_b64 s[0:1], s[8:9], s22 -; GFX11-NEXT: s_lshl_b64 s[24:25], s[10:11], s23 -; GFX11-NEXT: s_lshr_b64 s[22:23], s[10:11], s22 +; GFX11-NEXT: s_lshr_b64 s[0:1], s[8:9], s18 +; GFX11-NEXT: s_lshl_b64 s[24:25], s[10:11], s19 +; GFX11-NEXT: s_lshr_b64 s[18:19], s[10:11], s18 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[24:25] -; GFX11-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 -; GFX11-NEXT: s_cmp_lg_u32 s27, 0 +; GFX11-NEXT: s_lshr_b64 s[10:11], s[10:11], s22 +; GFX11-NEXT: s_cmp_lg_u32 s26, 0 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11] -; GFX11-NEXT: s_cmp_lg_u32 s28, 0 -; GFX11-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] ; GFX11-NEXT: s_cmp_lg_u32 s27, 0 -; GFX11-NEXT: s_cselect_b64 s[8:9], s[22:23], 0 -; GFX11-NEXT: s_and_not1_b64 s[10:11], s[18:19], s[20:21] -; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] -; GFX11-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] +; GFX11-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] +; GFX11-NEXT: s_cmp_lg_u32 s26, 0 +; GFX11-NEXT: s_cselect_b64 s[8:9], s[18:19], 0 +; GFX11-NEXT: s_and_not1_b64 s[10:11], 0x7f, s[20:21] ; GFX11-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 -; GFX11-NEXT: s_lshr_b32 s18, s5, 31 +; GFX11-NEXT: s_lshr_b32 s22, s5, 31 +; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] +; GFX11-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f ; GFX11-NEXT: s_or_b64 s[0:1], s[16:17], s[0:1] ; GFX11-NEXT: s_lshl_b64 s[4:5], s[4:5], 1 -; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[18:19] +; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[22:23] ; GFX11-NEXT: s_sub_i32 s9, s10, 64 ; GFX11-NEXT: s_sub_i32 s11, 64, s10 ; GFX11-NEXT: s_cmp_lt_u32 s10, 64 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll index d6957be8ab8ffb..64c3cd4e8c067f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll @@ -146,6 +146,7 @@ define amdgpu_ps void @insertelement_v_v2i16_s_s(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: s_lshl_b32 s1, s1, s0 ; GFX7-NEXT: s_lshl_b32 s0, 0xffff, s0 ; GFX7-NEXT: s_not_b32 s0, s0 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 @@ -515,6 +516,7 @@ define amdgpu_ps void @insertelement_v_v2i16_s_v(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: v_lshl_b32_e32 v2, s0, v1 ; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 ; GFX7-NEXT: v_not_b32_e32 v1, v1 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_and_b32_e32 v0, v0, v1 @@ -608,6 +610,7 @@ define amdgpu_ps void @insertelement_v_v2i16_v_s(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: v_lshlrev_b32_e32 v1, s0, v1 ; GFX7-NEXT: s_lshl_b32 s0, 0xffff, s0 ; GFX7-NEXT: s_not_b32 s0, s0 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 @@ -701,6 +704,7 @@ define amdgpu_ps void @insertelement_v_v2i16_v_v(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: v_lshlrev_b32_e32 v2, v1, v2 ; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 ; GFX7-NEXT: v_not_b32_e32 v1, v1 +; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_and_b32_e32 v0, v0, v1 @@ -866,6 +870,7 @@ define amdgpu_ps void @insertelement_v_v4i16_s_s(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: s_lshl_b32 s1, 0xffff, s1 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 ; GFX7-NEXT: s_not_b32 s1, s1 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc @@ -1406,6 +1411,7 @@ define amdgpu_ps void @insertelement_v_v4i16_s_v(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 ; GFX7-NEXT: v_not_b32_e32 v2, v2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v3 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc @@ -1529,6 +1535,7 @@ define amdgpu_ps void @insertelement_v_v4i16_v_s(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: s_lshl_b32 s1, 0xffff, s1 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 ; GFX7-NEXT: s_not_b32 s1, s1 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc @@ -1653,6 +1660,7 @@ define amdgpu_ps void @insertelement_v_v4i16_v_v(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX7-NEXT: v_not_b32_e32 v3, v3 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc @@ -1976,6 +1984,7 @@ define amdgpu_ps void @insertelement_v_v8i16_s_s(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: s_not_b32 s6, s0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc @@ -2693,6 +2702,7 @@ define amdgpu_ps void @insertelement_v_v8i16_s_v(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0 ; GFX7-NEXT: v_not_b32_e32 v1, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v7, v3, v4, vcc @@ -2846,6 +2856,7 @@ define amdgpu_ps void @insertelement_v_v8i16_v_s(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: s_not_b32 s5, s0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc @@ -3001,6 +3012,7 @@ define amdgpu_ps void @insertelement_v_v8i16_v_v(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0 ; GFX7-NEXT: v_not_b32_e32 v1, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc @@ -3351,6 +3363,7 @@ define amdgpu_ps void @insertelement_v_v16i16_s_s(ptr addrspace(1) %ptr, i16 inr ; GFX7-NEXT: s_lshl_b32 s1, s1, s0 ; GFX7-NEXT: s_lshl_b32 s0, 0xffff, s0 ; GFX7-NEXT: s_not_b32 s0, s0 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_movrels_b32_e32 v0, v2 @@ -4289,11 +4302,11 @@ define amdgpu_ps void @insertelement_v_v16i16_s_v(ptr addrspace(1) %ptr, i16 inr ; ; GFX7-LABEL: insertelement_v_v16i16_s_v: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s18, 0 -; GFX7-NEXT: s_mov_b32 s19, 0xf000 -; GFX7-NEXT: s_mov_b64 s[16:17], 0 -; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[16:19], 0 addr64 -; GFX7-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[16:19], 0 addr64 offset:16 +; GFX7-NEXT: s_mov_b32 s14, 0 +; GFX7-NEXT: s_mov_b32 s15, 0xf000 +; GFX7-NEXT: s_mov_b64 s[12:13], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[12:15], 0 addr64 +; GFX7-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:16 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 1, v2 ; GFX7-NEXT: v_and_b32_e32 v1, 1, v2 ; GFX7-NEXT: s_and_b32 s0, s2, 0xffff @@ -4309,7 +4322,7 @@ define amdgpu_ps void @insertelement_v_v16i16_s_v(ptr addrspace(1) %ptr, i16 inr ; GFX7-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0 ; GFX7-NEXT: v_not_b32_e32 v1, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0 -; GFX7-NEXT: s_mov_b32 s18, -1 +; GFX7-NEXT: s_mov_b32 s14, -1 ; GFX7-NEXT: s_waitcnt vmcnt(1) ; GFX7-NEXT: v_cndmask_b32_e32 v11, v3, v4, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v5, s[0:1] @@ -4325,13 +4338,14 @@ define amdgpu_ps void @insertelement_v_v16i16_s_v(ptr addrspace(1) %ptr, i16 inr ; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v11, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v2, v5, v11, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e64 v3, v6, v11, s[2:3] +; GFX7-NEXT: s_mov_b64 s[12:13], 0 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v7, v11, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e64 v5, v8, v11, s[6:7] ; GFX7-NEXT: v_cndmask_b32_e64 v6, v9, v11, s[8:9] ; GFX7-NEXT: v_cndmask_b32_e64 v7, v10, v11, s[10:11] -; GFX7-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GFX7-NEXT: s_mov_b64 s[16:17], 16 -; GFX7-NEXT: buffer_store_dwordx4 v[4:7], off, s[16:19], 0 +; GFX7-NEXT: buffer_store_dwordx4 v[0:3], off, s[12:15], 0 +; GFX7-NEXT: s_mov_b64 s[12:13], 16 +; GFX7-NEXT: buffer_store_dwordx4 v[4:7], off, s[12:15], 0 ; GFX7-NEXT: s_endpgm ; ; GFX10-LABEL: insertelement_v_v16i16_s_v: @@ -4523,6 +4537,7 @@ define amdgpu_ps void @insertelement_v_v16i16_v_s(ptr addrspace(1) %ptr, i16 %va ; GFX7-NEXT: v_lshlrev_b32_e32 v0, s0, v0 ; GFX7-NEXT: s_lshl_b32 s0, 0xffff, s0 ; GFX7-NEXT: s_not_b32 s0, s0 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_movrels_b32_e32 v1, v3 @@ -4686,11 +4701,11 @@ define amdgpu_ps void @insertelement_v_v16i16_v_v(ptr addrspace(1) %ptr, i16 %va ; ; GFX7-LABEL: insertelement_v_v16i16_v_v: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s18, 0 -; GFX7-NEXT: s_mov_b32 s19, 0xf000 -; GFX7-NEXT: s_mov_b64 s[16:17], 0 -; GFX7-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[16:19], 0 addr64 -; GFX7-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[16:19], 0 addr64 offset:16 +; GFX7-NEXT: s_mov_b32 s14, 0 +; GFX7-NEXT: s_mov_b32 s15, 0xf000 +; GFX7-NEXT: s_mov_b64 s[12:13], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[12:15], 0 addr64 +; GFX7-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[12:15], 0 addr64 offset:16 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 1, v3 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX7-NEXT: v_and_b32_e32 v1, 1, v3 @@ -4706,7 +4721,7 @@ define amdgpu_ps void @insertelement_v_v16i16_v_v(ptr addrspace(1) %ptr, i16 %va ; GFX7-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0 ; GFX7-NEXT: v_not_b32_e32 v1, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0 -; GFX7-NEXT: s_mov_b32 s18, -1 +; GFX7-NEXT: s_mov_b32 s14, -1 ; GFX7-NEXT: s_waitcnt vmcnt(1) ; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[0:1] @@ -4722,13 +4737,14 @@ define amdgpu_ps void @insertelement_v_v16i16_v_v(ptr addrspace(1) %ptr, i16 %va ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v12, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, v12, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v12, s[2:3] +; GFX7-NEXT: s_mov_b64 s[12:13], 0 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, v12, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v12, s[6:7] ; GFX7-NEXT: v_cndmask_b32_e64 v6, v10, v12, s[8:9] ; GFX7-NEXT: v_cndmask_b32_e64 v7, v11, v12, s[10:11] -; GFX7-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GFX7-NEXT: s_mov_b64 s[16:17], 16 -; GFX7-NEXT: buffer_store_dwordx4 v[4:7], off, s[16:19], 0 +; GFX7-NEXT: buffer_store_dwordx4 v[0:3], off, s[12:15], 0 +; GFX7-NEXT: s_mov_b64 s[12:13], 16 +; GFX7-NEXT: buffer_store_dwordx4 v[4:7], off, s[12:15], 0 ; GFX7-NEXT: s_endpgm ; ; GFX10-LABEL: insertelement_v_v16i16_v_v: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll index d531462ae9cc96..16b702edff2db9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll @@ -159,6 +159,7 @@ define amdgpu_ps void @insertelement_v_v2i8_s_s(ptr addrspace(1) %ptr, i8 inreg ; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 @@ -581,6 +582,7 @@ define amdgpu_ps void @insertelement_v_v2i8_s_v(ptr addrspace(1) %ptr, i8 inreg ; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 8, v0 @@ -681,6 +683,7 @@ define amdgpu_ps void @insertelement_v_v2i8_v_s(ptr addrspace(1) %ptr, i8 %val, ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -781,6 +784,7 @@ define amdgpu_ps void @insertelement_v_v2i8_v_v(ptr addrspace(1) %ptr, i8 %val, ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -943,6 +947,7 @@ define amdgpu_ps void @insertelement_v_v4i8_s_s(ptr addrspace(1) %ptr, i8 inreg ; GFX7-NEXT: s_lshl_b32 s1, s1, s0 ; GFX7-NEXT: s_lshl_b32 s0, 0xff, s0 ; GFX7-NEXT: s_not_b32 s0, s0 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 @@ -1312,6 +1317,7 @@ define amdgpu_ps void @insertelement_v_v4i8_s_v(ptr addrspace(1) %ptr, i8 inreg ; GFX7-NEXT: v_lshl_b32_e32 v2, s0, v1 ; GFX7-NEXT: v_lshl_b32_e32 v1, 0xff, v1 ; GFX7-NEXT: v_not_b32_e32 v1, v1 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_and_b32_e32 v0, v0, v1 @@ -1405,6 +1411,7 @@ define amdgpu_ps void @insertelement_v_v4i8_v_s(ptr addrspace(1) %ptr, i8 %val, ; GFX7-NEXT: v_lshlrev_b32_e32 v1, s0, v1 ; GFX7-NEXT: s_lshl_b32 s0, 0xff, s0 ; GFX7-NEXT: s_not_b32 s0, s0 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 @@ -1498,6 +1505,7 @@ define amdgpu_ps void @insertelement_v_v4i8_v_v(ptr addrspace(1) %ptr, i8 %val, ; GFX7-NEXT: v_lshlrev_b32_e32 v2, v1, v2 ; GFX7-NEXT: v_lshl_b32_e32 v1, 0xff, v1 ; GFX7-NEXT: v_not_b32_e32 v1, v1 +; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_and_b32_e32 v0, v0, v1 @@ -1741,6 +1749,7 @@ define amdgpu_ps void @insertelement_v_v8i8_s_s(ptr addrspace(1) %ptr, i8 inreg ; GFX7-NEXT: s_lshl_b32 s1, 0xff, s1 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 ; GFX7-NEXT: s_not_b32 s1, s1 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc @@ -2281,6 +2290,7 @@ define amdgpu_ps void @insertelement_v_v8i8_s_v(ptr addrspace(1) %ptr, i8 inreg ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 ; GFX7-NEXT: v_not_b32_e32 v2, v2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v3 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc @@ -2404,6 +2414,7 @@ define amdgpu_ps void @insertelement_v_v8i8_v_s(ptr addrspace(1) %ptr, i8 %val, ; GFX7-NEXT: s_lshl_b32 s1, 0xff, s1 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 ; GFX7-NEXT: s_not_b32 s1, s1 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc @@ -2528,6 +2539,7 @@ define amdgpu_ps void @insertelement_v_v8i8_v_v(ptr addrspace(1) %ptr, i8 %val, ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX7-NEXT: v_not_b32_e32 v3, v3 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc @@ -2851,6 +2863,7 @@ define amdgpu_ps void @insertelement_v_v16i8_s_s(ptr addrspace(1) %ptr, i8 inreg ; GFX7-NEXT: s_not_b32 s6, s0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc @@ -3568,6 +3581,7 @@ define amdgpu_ps void @insertelement_v_v16i8_s_v(ptr addrspace(1) %ptr, i8 inreg ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0 ; GFX7-NEXT: v_not_b32_e32 v1, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v7, v3, v4, vcc @@ -3721,6 +3735,7 @@ define amdgpu_ps void @insertelement_v_v16i8_v_s(ptr addrspace(1) %ptr, i8 %val, ; GFX7-NEXT: s_not_b32 s5, s0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc @@ -3876,6 +3891,7 @@ define amdgpu_ps void @insertelement_v_v16i8_v_v(ptr addrspace(1) %ptr, i8 %val, ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0 ; GFX7-NEXT: v_not_b32_e32 v1, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index 61439021a88757..dc9cbb498dab4d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -687,17 +687,17 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: s_mov_b32 s18, 0 +; GPRIDX-NEXT: s_mov_b32 s16, 0 +; GPRIDX-NEXT: s_mov_b32 s14, 0 +; GPRIDX-NEXT: s_mov_b32 s12, 0 +; GPRIDX-NEXT: s_mov_b32 s8, 0 ; GPRIDX-NEXT: s_mov_b64 s[4:5], 1.0 ; GPRIDX-NEXT: s_mov_b32 s19, 0x40200000 ; GPRIDX-NEXT: s_mov_b32 s17, 0x401c0000 -; GPRIDX-NEXT: s_mov_b32 s16, s18 ; GPRIDX-NEXT: s_mov_b32 s15, 0x40180000 -; GPRIDX-NEXT: s_mov_b32 s14, s18 ; GPRIDX-NEXT: s_mov_b32 s13, 0x40140000 -; GPRIDX-NEXT: s_mov_b32 s12, s18 ; GPRIDX-NEXT: s_mov_b64 s[10:11], 4.0 ; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000 -; GPRIDX-NEXT: s_mov_b32 s8, s18 ; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 ; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 @@ -753,17 +753,17 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_mov_b32 s18, 0 +; GFX10-NEXT: s_mov_b32 s16, 0 +; GFX10-NEXT: s_mov_b32 s14, 0 +; GFX10-NEXT: s_mov_b32 s12, 0 +; GFX10-NEXT: s_mov_b32 s8, 0 ; GFX10-NEXT: s_mov_b64 s[4:5], 1.0 ; GFX10-NEXT: s_mov_b32 s19, 0x40200000 ; GFX10-NEXT: s_mov_b32 s17, 0x401c0000 -; GFX10-NEXT: s_mov_b32 s16, s18 ; GFX10-NEXT: s_mov_b32 s15, 0x40180000 -; GFX10-NEXT: s_mov_b32 s14, s18 ; GFX10-NEXT: s_mov_b32 s13, 0x40140000 -; GFX10-NEXT: s_mov_b32 s12, s18 ; GFX10-NEXT: s_mov_b64 s[10:11], 4.0 ; GFX10-NEXT: s_mov_b32 s9, 0x40080000 -; GFX10-NEXT: s_mov_b32 s8, s18 ; GFX10-NEXT: s_mov_b64 s[6:7], 2.0 ; GFX10-NEXT: v_mov_b32_e32 v3, s4 ; GFX10-NEXT: v_mov_b32_e32 v4, s5 @@ -820,16 +820,16 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s14, 0 ; GFX11-NEXT: s_mov_b32 s15, 0x40200000 +; GFX11-NEXT: s_mov_b32 s12, 0 +; GFX11-NEXT: s_mov_b32 s10, 0 +; GFX11-NEXT: s_mov_b32 s8, 0 +; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_mov_b64 s[0:1], 1.0 ; GFX11-NEXT: s_mov_b32 s13, 0x401c0000 -; GFX11-NEXT: s_mov_b32 s12, s14 ; GFX11-NEXT: s_mov_b32 s11, 0x40180000 -; GFX11-NEXT: s_mov_b32 s10, s14 ; GFX11-NEXT: s_mov_b32 s9, 0x40140000 -; GFX11-NEXT: s_mov_b32 s8, s14 ; GFX11-NEXT: s_mov_b64 s[6:7], 4.0 ; GFX11-NEXT: s_mov_b32 s5, 0x40080000 -; GFX11-NEXT: s_mov_b32 s4, s14 ; GFX11-NEXT: s_mov_b64 s[2:3], 2.0 ; GFX11-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v17, s14 ; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir index 2714982163fec8..f1c3673ae29dd3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir @@ -23,6 +23,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -32,6 +33,7 @@ body: | ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -41,6 +43,7 @@ body: | ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} @@ -75,18 +78,17 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4, implicit $exec ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -96,6 +98,7 @@ body: | ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -103,18 +106,17 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4, implicit $exec ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} @@ -153,6 +155,7 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -162,6 +165,7 @@ body: | ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} @@ -171,6 +175,7 @@ body: | ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} @@ -205,18 +210,17 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4, implicit $exec ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -226,6 +230,7 @@ body: | ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} @@ -233,18 +238,17 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4, implicit $exec ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} @@ -281,18 +285,17 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4, implicit $exec ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -300,18 +303,17 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4, implicit $exec ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -319,18 +321,17 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4, implicit $exec ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} @@ -338,17 +339,15 @@ body: | ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4, implicit $exec ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -378,6 +377,7 @@ body: | ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -386,6 +386,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -394,6 +395,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} @@ -427,6 +429,7 @@ body: | ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -435,6 +438,7 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} @@ -443,6 +447,7 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir index bc131f53910d9b..e1ef96bec0fdba 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir @@ -144,17 +144,15 @@ body: | ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4, implicit $exec ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 @@ -164,17 +162,15 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4, implicit $exec ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 @@ -344,17 +340,15 @@ body: | ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4, implicit $exec ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 @@ -364,17 +358,15 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4, implicit $exec ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 @@ -423,22 +415,20 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec - ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4, implicit $exec ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] ; @@ -449,22 +439,20 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4, implicit $exec ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] ; @@ -475,17 +463,15 @@ body: | ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4, implicit $exec ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 @@ -495,17 +481,15 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4, implicit $exec ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 @@ -834,19 +818,17 @@ body: | ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-FLAT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; GFX7-FLAT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4095 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY3]], [[COPY4]], implicit-def $scc ; GFX7-FLAT-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY5]], [[COPY6]], implicit-def dead $scc, implicit $scc - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 @@ -855,19 +837,17 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4095 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY3]], [[COPY4]], implicit-def $scc ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY5]], [[COPY6]], implicit-def dead $scc, implicit $scc - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir index 78071022fc05c2..868f08d805caa4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir @@ -21,6 +21,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -28,6 +29,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -35,6 +37,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -64,18 +67,21 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -102,18 +108,17 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -121,23 +126,23 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -168,39 +173,38 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -229,18 +233,17 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -248,23 +251,23 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -295,39 +298,38 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -356,18 +358,17 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -375,23 +376,23 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -422,39 +423,38 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -483,68 +483,63 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -569,65 +564,60 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -652,6 +642,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -659,6 +650,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -666,6 +658,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} @@ -695,18 +688,21 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} @@ -733,18 +729,17 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -752,23 +747,23 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} @@ -799,39 +794,38 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; ; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; ; GFX11-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir index d3eaee283d6447..43d9b911b9f335 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir @@ -27,6 +27,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; ; GFX7-LABEL: name: global_atomicrmw_add_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -34,6 +35,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX9-LABEL: name: global_atomicrmw_add_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -41,6 +43,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; ; GFX10-LABEL: name: global_atomicrmw_add_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -48,6 +51,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; ; GFX11-LABEL: name: global_atomicrmw_add_s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -82,24 +86,28 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -133,23 +141,23 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -157,6 +165,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -164,6 +173,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -200,34 +210,36 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -263,23 +275,23 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -287,23 +299,23 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -340,44 +352,44 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -413,23 +425,23 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -437,23 +449,23 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -490,44 +502,44 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -564,73 +576,69 @@ body: | ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097 ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -662,70 +670,66 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097 ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4097, implicit $exec ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -755,6 +759,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] + ; ; GFX7-LABEL: name: global_atomicrmw_add_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} @@ -762,6 +767,7 @@ body: | ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; ; GFX9-LABEL: name: global_atomicrmw_add_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -769,6 +775,7 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; ; GFX10-LABEL: name: global_atomicrmw_add_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -776,6 +783,7 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; ; GFX11-LABEL: name: global_atomicrmw_add_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} @@ -810,24 +818,28 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; ; GFX7-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; ; GFX9-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; ; GFX10-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; ; GFX11-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} @@ -861,23 +873,23 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] + ; ; GFX7-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -885,23 +897,23 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; ; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; ; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} @@ -938,44 +950,44 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; ; GFX7-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; ; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; ; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir index dadc6f32dfb8cf..ca3fd71f6c9814 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir @@ -136,58 +136,34 @@ tracksRegLiveness: true body: | bb.0: ; WAVE64-LABEL: name: constant_v_s64 - ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -1, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -54, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 27, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967295, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec ; WAVE64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_8]], %subreg.sub0, [[V_MOV_B32_e32_9]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_10]], %subreg.sub0, [[V_MOV_B32_e32_11]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_12]], %subreg.sub0, [[V_MOV_B32_e32_13]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]] ; ; WAVE32-LABEL: name: constant_v_s64 - ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -1, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -54, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 27, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967295, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec ; WAVE32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_8]], %subreg.sub0, [[V_MOV_B32_e32_9]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_10]], %subreg.sub0, [[V_MOV_B32_e32_11]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_12]], %subreg.sub0, [[V_MOV_B32_e32_13]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]] %0:vgpr(s64) = G_CONSTANT i64 0 %1:vgpr(s64) = G_CONSTANT i64 1 %2:vgpr(s64) = G_CONSTANT i64 -1 @@ -208,42 +184,34 @@ tracksRegLiveness: true body: | bb.0: ; WAVE64-LABEL: name: constant_s_s64 - ; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; WAVE64-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 - ; WAVE64-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242 - ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0 + ; WAVE64-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1 + ; WAVE64-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1 + ; WAVE64-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54 + ; WAVE64-NEXT: [[S_MOV_B4:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 27 + ; WAVE64-NEXT: [[S_MOV_B5:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295 + ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; WAVE64-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27 - ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 + ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 ; WAVE64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 - ; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE64-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; WAVE64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_4]], %subreg.sub0, [[S_MOV_B32_5]], %subreg.sub1 - ; WAVE64-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 - ; WAVE64-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 - ; WAVE64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]], implicit [[S_MOV_B4]], implicit [[S_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]] ; ; WAVE32-LABEL: name: constant_s_s64 - ; WAVE32: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; WAVE32-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 - ; WAVE32-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242 - ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0 + ; WAVE32-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1 + ; WAVE32-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1 + ; WAVE32-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54 + ; WAVE32-NEXT: [[S_MOV_B4:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 27 + ; WAVE32-NEXT: [[S_MOV_B5:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295 + ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; WAVE32-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27 - ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 + ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 ; WAVE32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 - ; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE32-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; WAVE32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_4]], %subreg.sub0, [[S_MOV_B32_5]], %subreg.sub1 - ; WAVE32-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 - ; WAVE32-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 - ; WAVE32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]], implicit [[S_MOV_B4]], implicit [[S_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]] %0:sgpr(s64) = G_CONSTANT i64 0 %1:sgpr(s64) = G_CONSTANT i64 1 %2:sgpr(s64) = G_CONSTANT i64 -1 @@ -351,42 +319,34 @@ tracksRegLiveness: true body: | bb.0: ; WAVE64-LABEL: name: constant_s_p1 - ; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; WAVE64-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 - ; WAVE64-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242 - ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0 + ; WAVE64-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1 + ; WAVE64-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1 + ; WAVE64-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54 + ; WAVE64-NEXT: [[S_MOV_B4:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 27 + ; WAVE64-NEXT: [[S_MOV_B5:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295 + ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; WAVE64-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27 - ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 + ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 ; WAVE64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 - ; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE64-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; WAVE64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_4]], %subreg.sub0, [[S_MOV_B32_5]], %subreg.sub1 - ; WAVE64-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 - ; WAVE64-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 - ; WAVE64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]], implicit [[S_MOV_B4]], implicit [[S_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]] ; ; WAVE32-LABEL: name: constant_s_p1 - ; WAVE32: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; WAVE32-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 - ; WAVE32-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242 - ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0 + ; WAVE32-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1 + ; WAVE32-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1 + ; WAVE32-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54 + ; WAVE32-NEXT: [[S_MOV_B4:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 27 + ; WAVE32-NEXT: [[S_MOV_B5:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295 + ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; WAVE32-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27 - ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 + ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 ; WAVE32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 - ; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE32-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; WAVE32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_4]], %subreg.sub0, [[S_MOV_B32_5]], %subreg.sub1 - ; WAVE32-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 - ; WAVE32-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 - ; WAVE32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]], implicit [[S_MOV_B4]], implicit [[S_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]] %0:sgpr(p1) = G_CONSTANT i64 0 %1:sgpr(p1) = G_CONSTANT i64 1 %2:sgpr(p1) = G_CONSTANT i64 -1 @@ -407,58 +367,34 @@ tracksRegLiveness: true body: | bb.0: ; WAVE64-LABEL: name: constant_v_p1 - ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -1, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -54, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 27, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967295, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec ; WAVE64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_8]], %subreg.sub0, [[V_MOV_B32_e32_9]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_10]], %subreg.sub0, [[V_MOV_B32_e32_11]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_12]], %subreg.sub0, [[V_MOV_B32_e32_13]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]] ; ; WAVE32-LABEL: name: constant_v_p1 - ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -1, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -54, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 27, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967295, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec ; WAVE32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_8]], %subreg.sub0, [[V_MOV_B32_e32_9]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_10]], %subreg.sub0, [[V_MOV_B32_e32_11]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_12]], %subreg.sub0, [[V_MOV_B32_e32_13]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]] %0:vgpr(p1) = G_CONSTANT i64 0 %1:vgpr(p1) = G_CONSTANT i64 1 %2:vgpr(p1) = G_CONSTANT i64 -1 @@ -479,42 +415,34 @@ tracksRegLiveness: true body: | bb.0: ; WAVE64-LABEL: name: constant_s_p999 - ; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; WAVE64-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 - ; WAVE64-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242 - ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0 + ; WAVE64-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1 + ; WAVE64-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1 + ; WAVE64-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54 + ; WAVE64-NEXT: [[S_MOV_B4:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 27 + ; WAVE64-NEXT: [[S_MOV_B5:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295 + ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; WAVE64-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27 - ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 + ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 ; WAVE64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 - ; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE64-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; WAVE64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_4]], %subreg.sub0, [[S_MOV_B32_5]], %subreg.sub1 - ; WAVE64-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 - ; WAVE64-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 - ; WAVE64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]], implicit [[S_MOV_B4]], implicit [[S_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]] ; ; WAVE32-LABEL: name: constant_s_p999 - ; WAVE32: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; WAVE32-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 - ; WAVE32-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242 - ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0 + ; WAVE32-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1 + ; WAVE32-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1 + ; WAVE32-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -54 + ; WAVE32-NEXT: [[S_MOV_B4:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 27 + ; WAVE32-NEXT: [[S_MOV_B5:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4294967295 + ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; WAVE32-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27 - ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 + ; WAVE32-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 ; WAVE32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 - ; WAVE32-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE32-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; WAVE32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_4]], %subreg.sub0, [[S_MOV_B32_5]], %subreg.sub1 - ; WAVE32-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 - ; WAVE32-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 - ; WAVE32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]], implicit [[S_MOV_B4]], implicit [[S_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]] %0:sgpr(p999) = G_CONSTANT i64 0 %1:sgpr(p999) = G_CONSTANT i64 1 %2:sgpr(p999) = G_CONSTANT i64 -1 @@ -535,58 +463,34 @@ tracksRegLiveness: true body: | bb.0: ; WAVE64-LABEL: name: constant_v_p999 - ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -1, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -54, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 27, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967295, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec + ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec ; WAVE64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_8]], %subreg.sub0, [[V_MOV_B32_e32_9]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_10]], %subreg.sub0, [[V_MOV_B32_e32_11]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_12]], %subreg.sub0, [[V_MOV_B32_e32_13]], %subreg.sub1 - ; WAVE64-NEXT: [[V_MOV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec - ; WAVE64-NEXT: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; WAVE64-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]] ; ; WAVE32-LABEL: name: constant_v_p999 - ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -1, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -54, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 27, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4294967295, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec + ; WAVE32-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec ; WAVE32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_8]], %subreg.sub0, [[V_MOV_B32_e32_9]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_10]], %subreg.sub0, [[V_MOV_B32_e32_11]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_12]], %subreg.sub0, [[V_MOV_B32_e32_13]], %subreg.sub1 - ; WAVE32-NEXT: [[V_MOV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec - ; WAVE32-NEXT: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; WAVE32-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]], implicit [[V_MOV_B1]], implicit [[V_MOV_B2]], implicit [[V_MOV_B3]], implicit [[V_MOV_B4]], implicit [[V_MOV_B5]], implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]] %0:vgpr(p999) = G_CONSTANT i64 0 %1:vgpr(p999) = G_CONSTANT i64 1 %2:vgpr(p999) = G_CONSTANT i64 -1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir index 23b10218cbbe89..2465c374cc11d0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir @@ -61,21 +61,13 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: fconstant_v_s64 - ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1072693248, implicit $exec - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1075838976, implicit $exec - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 - ; GCN-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1073741824, implicit $exec - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 - ; GCN-NEXT: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1076101120, implicit $exec - ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] - ; GCN-NEXT: $vgpr2_vgpr3 = COPY [[REG_SEQUENCE1]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + ; GCN-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4620693217682128896, implicit $exec + ; GCN-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4611686018427387904, implicit $exec + ; GCN-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4621819117588971520, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MOV_B]] + ; GCN-NEXT: $vgpr2_vgpr3 = COPY [[V_MOV_B1]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B2]], implicit [[V_MOV_B3]] %0:vgpr(s64) = G_FCONSTANT double 1.0 %1:vgpr(s64) = G_FCONSTANT double 8.0 %2:vgpr(s64) = G_FCONSTANT double -2.0 @@ -95,17 +87,13 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: fconstant_s_s64 - ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 4607182418800017408 - ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1075838976 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GCN-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -4611686018427387904 - ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -1071382528 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_MOV_B64_]] - ; GCN-NEXT: $sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] - ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_1]], implicit [[REG_SEQUENCE1]] + ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4607182418800017408 + ; GCN-NEXT: [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4620693217682128896 + ; GCN-NEXT: [[S_MOV_B2:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4611686018427387904 + ; GCN-NEXT: [[S_MOV_B3:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4601552919265804288 + ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_MOV_B]] + ; GCN-NEXT: $sgpr2_sgpr3 = COPY [[S_MOV_B1]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B]], implicit [[S_MOV_B1]], implicit [[S_MOV_B2]], implicit [[S_MOV_B3]] %0:sgpr(s64) = G_FCONSTANT double 1.0 %1:sgpr(s64) = G_FCONSTANT double 8.0 %2:sgpr(s64) = G_FCONSTANT double -2.0 @@ -136,7 +124,6 @@ body: | %2:vgpr(s32) = G_ANYEXT %0 %3:vgpr(s32) = G_ANYEXT %1 - ; Test without already assigned register class %4:vgpr(s16) = G_FCONSTANT half 1.0 %5:vgpr(s16) = G_FCONSTANT half 8.0 $vgpr0 = COPY %2 @@ -168,7 +155,6 @@ body: | %2:vgpr(s32) = G_ANYEXT %0 %3:vgpr(s32) = G_ANYEXT %1 - ; Test without already assigned register class %4:sgpr(s16) = G_FCONSTANT half 1.0 %5:sgpr(s16) = G_FCONSTANT half 8.0 $sgpr0 = COPY %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir index 15ece434487ed1..6ccd2a9c3e6785 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir @@ -17,24 +17,21 @@ body: | ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %6:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], %5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 %3:vgpr(p1) = COPY $vgpr3_vgpr4 - ; fmul vs %4:vgpr(s32) = G_FMUL %1, %0 - ; fmul sv %5:vgpr(s32) = G_FMUL %0, %1 - ; fmul vv %6:vgpr(s32) = G_FMUL %1, %2 G_STORE %4, %3 :: (store (s32), addrspace 1) @@ -57,22 +54,19 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: %4:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %5:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %6:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %4, implicit %5, implicit %6 + ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_F64_e64_]], implicit [[V_MUL_F64_e64_1]], implicit [[V_MUL_F64_e64_2]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 %2:vgpr(s64) = COPY $vgpr2_vgpr3 %3:vgpr(p1) = COPY $vgpr4_vgpr5 - ; fmul vs %4:vgpr(s64) = G_FMUL %1, %0 - ; fmul sv %5:vgpr(s64) = G_FMUL %0, %1 - ; fmul vv %6:vgpr(s64) = G_FMUL %1, %2 S_ENDPGM 0, implicit %4, implicit %5, implicit %6 @@ -92,10 +86,10 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %7:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %9:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %7, implicit %8, implicit %9 + ; GCN-NEXT: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]], implicit [[V_MUL_F16_e64_1]], implicit [[V_MUL_F16_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -105,13 +99,10 @@ body: | %5:vgpr(s16) = G_TRUNC %1 %6:vgpr(s16) = G_TRUNC %2 - ; fmul vs %8:vgpr(s16) = G_FMUL %4, %4 - ; fmul sv %9:vgpr(s16) = G_FMUL %4, %4 - ; fmul vv %10:vgpr(s16) = G_FMUL %4, %5 S_ENDPGM 0, implicit %8, implicit %9, implicit %10 @@ -131,26 +122,26 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: %6:vgpr_32 = nofpexcept V_MUL_F32_e64 2, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %7:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32 = nofpexcept V_MUL_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %9:vgpr_32 = nofpexcept V_MUL_F32_e64 1, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %10:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %11:vgpr_32 = nofpexcept V_MUL_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %12:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %13:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %11, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %12, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %14, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 2, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 1, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_7:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_8:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_9:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_4]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_5]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_6]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_7]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(p1) = COPY $vgpr2_vgpr3 @@ -159,37 +150,27 @@ body: | %4:vgpr(s32) = G_FNEG %0 %5:vgpr(s32) = G_FNEG %3 - ; fabs lhs %6:vgpr(s32) = G_FMUL %3, %0 - ; fabs rhs %7:vgpr(s32) = G_FMUL %0, %3 - ; fabs lhs, rhs %8:vgpr(s32) = G_FMUL %3, %3 - ; fneg lhs %9:vgpr(s32) = G_FMUL %4, %0 - ; fneg rhs %10:vgpr(s32) = G_FMUL %0, %4 - ; fneg lhs, rhs %11:vgpr(s32) = G_FMUL %4, %4 - ; fneg fabs lhs %12:vgpr(s32) = G_FMUL %5, %0 - ; fneg fabs rhs %13:vgpr(s32) = G_FMUL %0, %5 - ; fneg fabs lhs, rhs %14:vgpr(s32) = G_FMUL %5, %5 - ; fneg fabs lhs, fneg rhs %15:vgpr(s32) = G_FMUL %5, %4 G_STORE %6, %2 :: (store (s32), addrspace 1) @@ -369,10 +350,8 @@ body: | ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1070596096, implicit $exec - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 3, [[COPY]], 1, [[REG_SEQUENCE]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4598175219545276416, implicit $exec + ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 3, [[COPY]], 1, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MUL_F64_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 %0:vgpr(s64) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir index cda4414a0d90ee..47e5a2f35a5679 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir @@ -20,10 +20,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (s64), addrspace 1) - ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec @@ -62,7 +60,6 @@ body: | bb.1: liveins: $sgpr0_sgpr1 - ; S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) ; CHECK-LABEL: name: fract_f64_neg_abs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} @@ -71,10 +68,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (s64), addrspace 1) - ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775808 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir index c9b1b782658c7f..de21788ff168f7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir @@ -21,18 +21,21 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -62,18 +65,21 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX10-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX11-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -103,18 +109,21 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-LABEL: name: load_atomic_flat_p3_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX10-LABEL: name: load_atomic_flat_p3_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX11-LABEL: name: load_atomic_flat_p3_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -144,18 +153,21 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX9-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX10-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX11-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -185,18 +197,21 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -226,18 +241,21 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX10-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX11-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -267,18 +285,21 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-LABEL: name: load_atomic_flat_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX10-LABEL: name: load_atomic_flat_p1_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX11-LABEL: name: load_atomic_flat_p1_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -308,18 +329,21 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; ; GFX9-LABEL: name: load_atomic_flat_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; ; GFX10-LABEL: name: load_atomic_flat_p0_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; ; GFX11-LABEL: name: load_atomic_flat_p0_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -347,65 +371,60 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 @@ -430,40 +449,39 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir index 2c2e792bce66dd..b678966de85377 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir @@ -28,6 +28,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} @@ -39,18 +40,21 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -80,24 +84,28 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX7-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX10-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -127,24 +135,28 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX7-LABEL: name: load_atomic_global_p3_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX7-FLAT-LABEL: name: load_atomic_global_p3_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-LABEL: name: load_atomic_global_p3_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX10-LABEL: name: load_atomic_global_p3_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -179,6 +191,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} @@ -190,18 +203,21 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; ; GFX10-LABEL: name: load_atomic_global_s64_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -231,24 +247,28 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX7-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -278,24 +298,28 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX7-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX7-FLAT-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX10-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -325,24 +349,28 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX7-LABEL: name: load_atomic_global_p1_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX7-FLAT-LABEL: name: load_atomic_global_p1_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-LABEL: name: load_atomic_global_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX10-LABEL: name: load_atomic_global_p1_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -372,24 +400,28 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; ; GFX7-LABEL: name: load_atomic_global_p0_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; ; GFX7-FLAT-LABEL: name: load_atomic_global_p0_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; ; GFX9-LABEL: name: load_atomic_global_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; ; GFX10-LABEL: name: load_atomic_global_p0_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -417,66 +449,64 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -513,6 +543,7 @@ body: | ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} @@ -524,43 +555,42 @@ body: | ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 @@ -585,66 +615,64 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; ; GFX10-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir index 8f17cc3ab47ec1..a53fd81f351a2d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir @@ -1189,11 +1189,11 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; GFX6-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 @@ -1204,11 +1204,11 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; GFX7-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 @@ -1219,11 +1219,11 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; GFX8-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 @@ -1259,51 +1259,45 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 - ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -524288 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 - ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -524288 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 - ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -524288 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_524288 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir index 78812ca1991f91..d7c32543988626 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -23,24 +23,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX8-LABEL: name: load_flat_s32_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX9-LABEL: name: load_flat_s32_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_flat_s32_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_flat_s32_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -70,24 +74,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; ; GFX8-LABEL: name: load_flat_s32_from_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; ; GFX9-LABEL: name: load_flat_s32_from_2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; ; GFX10-LABEL: name: load_flat_s32_from_2 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; ; GFX11-LABEL: name: load_flat_s32_from_2 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -117,24 +125,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -164,24 +176,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX8-LABEL: name: load_flat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX9-LABEL: name: load_flat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX10-LABEL: name: load_flat_v2s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX11-LABEL: name: load_flat_v2s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -211,24 +227,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; ; GFX8-LABEL: name: load_flat_v3s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; ; GFX9-LABEL: name: load_flat_v3s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; ; GFX10-LABEL: name: load_flat_v3s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; ; GFX11-LABEL: name: load_flat_v3s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -258,24 +278,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; ; GFX8-LABEL: name: load_flat_v4s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; ; GFX9-LABEL: name: load_flat_v4s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; ; GFX10-LABEL: name: load_flat_v4s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; ; GFX11-LABEL: name: load_flat_v4s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -305,24 +329,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX8-LABEL: name: load_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX9-LABEL: name: load_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX10-LABEL: name: load_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX11-LABEL: name: load_flat_s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -352,24 +380,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; ; GFX8-LABEL: name: load_flat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; ; GFX9-LABEL: name: load_flat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; ; GFX10-LABEL: name: load_flat_v2s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; ; GFX11-LABEL: name: load_flat_v2s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -399,24 +431,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; ; GFX8-LABEL: name: load_flat_v2p1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; ; GFX9-LABEL: name: load_flat_v2p1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; ; GFX10-LABEL: name: load_flat_v2p1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; ; GFX11-LABEL: name: load_flat_v2p1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -446,24 +482,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; ; GFX8-LABEL: name: load_flat_s96 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; ; GFX9-LABEL: name: load_flat_s96 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; ; GFX10-LABEL: name: load_flat_s96 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; ; GFX11-LABEL: name: load_flat_s96 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -493,24 +533,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; ; GFX8-LABEL: name: load_flat_s128 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; ; GFX9-LABEL: name: load_flat_s128 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; ; GFX10-LABEL: name: load_flat_s128 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; ; GFX11-LABEL: name: load_flat_s128 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -540,24 +584,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX8-LABEL: name: load_flat_p3_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX9-LABEL: name: load_flat_p3_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_flat_p3_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_flat_p3_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -587,24 +635,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX8-LABEL: name: load_flat_p1_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX9-LABEL: name: load_flat_p1_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX10-LABEL: name: load_flat_p1_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX11-LABEL: name: load_flat_p1_from_8 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -634,24 +686,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; ; GFX8-LABEL: name: load_flat_p999_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; ; GFX9-LABEL: name: load_flat_p999_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; ; GFX10-LABEL: name: load_flat_p999_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; ; GFX11-LABEL: name: load_flat_p999_from_8 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -681,24 +737,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX8-LABEL: name: load_flat_v2p3 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-LABEL: name: load_flat_v2p3 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX10-LABEL: name: load_flat_v2p3 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX11-LABEL: name: load_flat_v2p3 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -728,24 +788,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX8-LABEL: name: load_flat_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX9-LABEL: name: load_flat_v2s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_flat_v2s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_flat_v2s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -775,24 +839,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX8-LABEL: name: load_flat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX9-LABEL: name: load_flat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX10-LABEL: name: load_flat_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; ; GFX11-LABEL: name: load_flat_v4s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -822,24 +890,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; ; GFX8-LABEL: name: load_flat_v6s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; ; GFX9-LABEL: name: load_flat_v6s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; ; GFX10-LABEL: name: load_flat_v6s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; ; GFX11-LABEL: name: load_flat_v6s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -869,24 +941,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; ; GFX8-LABEL: name: load_flat_v8s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; ; GFX9-LABEL: name: load_flat_v8s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; ; GFX10-LABEL: name: load_flat_v8s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; ; GFX11-LABEL: name: load_flat_v8s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -918,56 +994,54 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -997,56 +1071,54 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -1076,81 +1148,75 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2047 @@ -1175,81 +1241,75 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 @@ -1274,56 +1334,54 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} @@ -1353,81 +1411,75 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4096 @@ -1452,81 +1504,75 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4095 @@ -1551,81 +1597,75 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4096 @@ -1650,81 +1690,75 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8191 @@ -1749,81 +1783,75 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8192 @@ -1848,81 +1876,75 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8191 @@ -1947,81 +1969,75 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8192 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir index 7ffa65f922456d..0103bfc9d39c14 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir @@ -243,17 +243,15 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 @@ -310,17 +308,15 @@ body: | ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec ; GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 @@ -440,35 +436,31 @@ body: | ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199 - ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX9-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4097 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199 - ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4097 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; @@ -476,17 +468,15 @@ body: | ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199 - ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX11-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -4097 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 @@ -562,17 +552,15 @@ body: | ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294965247 - ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -2049 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir index f26e23293dae79..27806edc918088 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir @@ -1212,34 +1212,30 @@ body: | ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047 @@ -1309,34 +1305,30 @@ body: | ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048 @@ -1350,17 +1342,15 @@ body: | ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2048, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_2048 @@ -1392,78 +1382,70 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec - ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2047, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047 @@ -1509,78 +1491,70 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -2048, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048 @@ -1650,34 +1624,30 @@ body: | ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095 @@ -1691,17 +1661,15 @@ body: | ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4095, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_4095 @@ -1759,85 +1727,75 @@ body: | ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 4096, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4096 @@ -1862,78 +1820,70 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec - ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095 @@ -1947,17 +1897,15 @@ body: | ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4095, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4095 @@ -1989,78 +1937,70 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096 @@ -2074,17 +2014,15 @@ body: | ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -4096, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4096 @@ -2142,85 +2080,75 @@ body: | ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8191, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8191 @@ -2271,85 +2199,75 @@ body: | ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 8192, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8192 @@ -2374,129 +2292,115 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8191, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8191 @@ -2521,129 +2425,115 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec - ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO -8192, implicit $exec ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8192 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir index edace19c47b161..ad53a2fd811202 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -193,7 +193,7 @@ body: | # Test a load of an offset from a constant base address # GCN-LABEL: name: constant_address_positive{{$}} -# GCN: %0:sreg_64 = S_MOV_B64 44 +# GCN: %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 44 # VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load (s32), addrspace 4) # SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load (s32), addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir index 1a20e55958742e..3f020aaa0365c7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir @@ -314,8 +314,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0 + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B]], implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 0 @@ -441,7 +441,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -2 + ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 @@ -468,7 +468,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -4 + ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 @@ -495,7 +495,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -8 + ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 @@ -522,7 +522,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -16 + ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 @@ -549,9 +549,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3758096384 - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64_IMM_PSEUDO -536870912 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 @@ -776,9 +774,7 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967294, implicit $exec - ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; CHECK-NEXT: %const:vreg_64 = V_MOV_B64_PSEUDO -2, implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 @@ -805,9 +801,7 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec - ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; CHECK-NEXT: %const:vreg_64 = V_MOV_B64_PSEUDO -4, implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 @@ -834,9 +828,7 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec - ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; CHECK-NEXT: %const:vreg_64 = V_MOV_B64_PSEUDO -4, implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 @@ -863,9 +855,7 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967280, implicit $exec - ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; CHECK-NEXT: %const:vreg_64 = V_MOV_B64_PSEUDO -16, implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 @@ -892,9 +882,7 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3758096384, implicit $exec - ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; CHECK-NEXT: %const:vreg_64 = V_MOV_B64_PSEUDO -536870912, implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir index c7520de936aa84..fc6925ee5709c5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -22,24 +22,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; ; GFX8-LABEL: name: store_flat_s32_to_4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; ; GFX9-LABEL: name: store_flat_s32_to_4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; ; GFX10-LABEL: name: store_flat_s32_to_4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; ; GFX11-LABEL: name: store_flat_s32_to_4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -68,24 +72,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; ; GFX8-LABEL: name: store_flat_s32_to_2 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; ; GFX9-LABEL: name: store_flat_s32_to_2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; ; GFX10-LABEL: name: store_flat_s32_to_2 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; ; GFX11-LABEL: name: store_flat_s32_to_2 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -114,24 +122,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; ; GFX8-LABEL: name: store_flat_s32_to_1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; ; GFX9-LABEL: name: store_flat_s32_to_1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; ; GFX10-LABEL: name: store_flat_s32_to_1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; ; GFX11-LABEL: name: store_flat_s32_to_1 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -161,24 +173,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; ; GFX8-LABEL: name: store_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; ; GFX9-LABEL: name: store_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; ; GFX10-LABEL: name: store_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; ; GFX11-LABEL: name: store_flat_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} @@ -207,24 +223,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) + ; ; GFX8-LABEL: name: store_flat_s96 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) + ; ; GFX9-LABEL: name: store_flat_s96 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) + ; ; GFX10-LABEL: name: store_flat_s96 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) + ; ; GFX11-LABEL: name: store_flat_s96 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} @@ -253,24 +273,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) + ; ; GFX8-LABEL: name: store_flat_s128 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) + ; ; GFX9-LABEL: name: store_flat_s128 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) + ; ; GFX10-LABEL: name: store_flat_s128 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) + ; ; GFX11-LABEL: name: store_flat_s128 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} @@ -300,24 +324,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; ; GFX8-LABEL: name: store_flat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; ; GFX9-LABEL: name: store_flat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; ; GFX10-LABEL: name: store_flat_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; ; GFX11-LABEL: name: store_flat_v2s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} @@ -346,24 +374,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; ; GFX8-LABEL: name: store_flat_v3s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; ; GFX9-LABEL: name: store_flat_v3s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; ; GFX10-LABEL: name: store_flat_v3s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; ; GFX11-LABEL: name: store_flat_v3s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} @@ -392,24 +424,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; ; GFX8-LABEL: name: store_flat_v4s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; ; GFX9-LABEL: name: store_flat_v4s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; ; GFX10-LABEL: name: store_flat_v4s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; ; GFX11-LABEL: name: store_flat_v4s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} @@ -439,24 +475,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; ; GFX8-LABEL: name: store_flat_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; ; GFX9-LABEL: name: store_flat_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; ; GFX10-LABEL: name: store_flat_v2s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; ; GFX11-LABEL: name: store_flat_v2s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -486,24 +526,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; ; GFX8-LABEL: name: store_flat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; ; GFX9-LABEL: name: store_flat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; ; GFX10-LABEL: name: store_flat_v4s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; ; GFX11-LABEL: name: store_flat_v4s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} @@ -533,24 +577,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) + ; ; GFX8-LABEL: name: store_flat_v6s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) + ; ; GFX9-LABEL: name: store_flat_v6s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) + ; ; GFX10-LABEL: name: store_flat_v6s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) + ; ; GFX11-LABEL: name: store_flat_v6s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} @@ -579,24 +627,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; ; GFX8-LABEL: name: store_flat_v8s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; ; GFX9-LABEL: name: store_flat_v8s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; ; GFX10-LABEL: name: store_flat_v8s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; ; GFX11-LABEL: name: store_flat_v8s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} @@ -626,24 +678,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; ; GFX8-LABEL: name: store_flat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; ; GFX9-LABEL: name: store_flat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; ; GFX10-LABEL: name: store_flat_v2s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; ; GFX11-LABEL: name: store_flat_v2s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} @@ -673,24 +729,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; ; GFX8-LABEL: name: store_flat_p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; ; GFX9-LABEL: name: store_flat_p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; ; GFX10-LABEL: name: store_flat_p1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; ; GFX11-LABEL: name: store_flat_p1 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} @@ -720,24 +780,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>)) + ; ; GFX8-LABEL: name: store_flat_v2p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>)) + ; ; GFX9-LABEL: name: store_flat_v2p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>)) + ; ; GFX10-LABEL: name: store_flat_v2p1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>)) + ; ; GFX11-LABEL: name: store_flat_v2p1 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} @@ -767,24 +831,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; ; GFX8-LABEL: name: store_flat_p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; ; GFX9-LABEL: name: store_flat_p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; ; GFX10-LABEL: name: store_flat_p3 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; ; GFX11-LABEL: name: store_flat_p3 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -814,24 +882,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 ; GFX7-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>)) + ; ; GFX8-LABEL: name: store_flat_v2p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>)) + ; ; GFX9-LABEL: name: store_flat_v2p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>)) + ; ; GFX10-LABEL: name: store_flat_v2p3 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 ; GFX10-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>)) + ; ; GFX11-LABEL: name: store_flat_v2p3 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} @@ -860,24 +932,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; ; GFX8-LABEL: name: store_atomic_flat_s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; ; GFX9-LABEL: name: store_atomic_flat_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; ; GFX10-LABEL: name: store_atomic_flat_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; ; GFX11-LABEL: name: store_atomic_flat_s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -907,24 +983,28 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; ; GFX8-LABEL: name: store_atomic_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; ; GFX9-LABEL: name: store_atomic_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; ; GFX10-LABEL: name: store_atomic_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; ; GFX11-LABEL: name: store_atomic_flat_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} @@ -953,55 +1033,53 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; ; GFX8-LABEL: name: store_flat_s32_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; ; GFX9-LABEL: name: store_flat_s32_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; ; GFX10-LABEL: name: store_flat_s32_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; ; GFX11-LABEL: name: store_flat_s32_gep_2047 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir index c56ba70b667d96..95e01aa413a374 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir @@ -1168,34 +1168,30 @@ body: | ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; ; GFX8-LABEL: name: store_global_s32_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 2047, implicit $exec ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub0 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; ; GFX9-LABEL: name: store_global_s32_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll index 13f78852071051..ce27598f69b3f1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll @@ -1638,21 +1638,21 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) % define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %out) #0 { ; GFX7-LABEL: test_div_scale_f64_val_undef_val: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX7-NEXT: s_mov_b32 s3, 0x40200000 -; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[0:1], s[2:3] +; GFX7-NEXT: v_mov_b32_e32 v0, 0 +; GFX7-NEXT: v_mov_b32_e32 v1, 0x40200000 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], s[0:1], s[0:1], v[0:1] ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_scale_f64_val_undef_val: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_mov_b32 s2, 0 -; GFX8-NEXT: s_mov_b32 s3, 0x40200000 -; GFX8-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[0:1], s[2:3] +; GFX8-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-NEXT: v_mov_b32_e32 v1, 0x40200000 +; GFX8-NEXT: v_div_scale_f64 v[0:1], s[2:3], s[0:1], s[0:1], v[0:1] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v3, s1 @@ -1662,22 +1662,18 @@ define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %ou ; ; GFX10-LABEL: test_div_scale_f64_val_undef_val: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mov_b32 s2, 0 -; GFX10-NEXT: s_mov_b32 s3, 0x40200000 -; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[0:1], s[0:1], s[2:3] +; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[0:1], s[0:1], 0x40200000 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: test_div_scale_f64_val_undef_val: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_mov_b32 s2, 0 -; GFX11-NEXT: s_mov_b32 s3, 0x40200000 -; GFX11-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-NEXT: v_div_scale_f64 v[0:1], null, s[0:1], s[0:1], s[2:3] +; GFX11-NEXT: v_div_scale_f64 v[0:1], null, s[0:1], s[0:1], 0x40200000 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_nop 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll index 02d1a92c693739..4ab963ed85ccad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll @@ -25,11 +25,8 @@ define i32 @global_atomic_csub_offset(ptr addrspace(1) %ptr, i32 %data) { ; GFX10-LABEL: global_atomic_csub_offset: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b64 s[4:5], 0x1000 -; GFX10-NEXT: v_mov_b32_e32 v3, s4 -; GFX10-NEXT: v_mov_b32_e32 v4, s5 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -37,12 +34,8 @@ define i32 @global_atomic_csub_offset(ptr addrspace(1) %ptr, i32 %data) { ; GFX11-LABEL: global_atomic_csub_offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b64 s[0:1], 0x1000 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0 -; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -73,11 +66,8 @@ define void @global_atomic_csub_offset_nortn(ptr addrspace(1) %ptr, i32 %data) { ; GFX10-LABEL: global_atomic_csub_offset_nortn: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b64 s[4:5], 0x1000 -; GFX10-NEXT: v_mov_b32_e32 v3, s4 -; GFX10-NEXT: v_mov_b32_e32 v4, s5 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: global_atomic_csub v0, v[0:1], v2, off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -85,12 +75,8 @@ define void @global_atomic_csub_offset_nortn(ptr addrspace(1) %ptr, i32 %data) { ; GFX11-LABEL: global_atomic_csub_offset_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b64 s[0:1], 0x1000 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0 -; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: global_atomic_csub_u32 v0, v[0:1], v2, off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll index aecfbe7aa22606..3ccedb0733d51d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll @@ -317,31 +317,31 @@ bb: define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, double %a, double %b) #0 { ; GCN-LABEL: test_mfma_f64_16x16x4f64_imm: ; GCN: ; %bb.0: ; %bb -; GCN-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x24 -; GCN-NEXT: s_mov_b64 s[4:5], 0 -; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GCN-NEXT: s_mov_b64 s[10:11], 1.0 -; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] +; GCN-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24 +; GCN-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x34 +; GCN-NEXT: s_mov_b64 s[0:1], 0 +; GCN-NEXT: s_mov_b64 s[6:7], 1.0 +; GCN-NEXT: s_mov_b64 s[2:3], s[0:1] ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_pk_mov_b32 v[0:1], s[14:15], s[14:15] op_sel:[0,1] -; GCN-NEXT: s_mov_b64 s[8:9], s[4:5] -; GCN-NEXT: v_accvgpr_write_b32 a0, s4 -; GCN-NEXT: v_accvgpr_write_b32 a1, s5 -; GCN-NEXT: v_accvgpr_write_b32 a2, s6 -; GCN-NEXT: v_accvgpr_write_b32 a3, s7 -; GCN-NEXT: v_accvgpr_write_b32 a4, s8 -; GCN-NEXT: v_accvgpr_write_b32 a5, s9 -; GCN-NEXT: v_accvgpr_write_b32 a6, s10 -; GCN-NEXT: v_accvgpr_write_b32 a7, s11 -; GCN-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] +; GCN-NEXT: v_pk_mov_b32 v[0:1], s[10:11], s[10:11] op_sel:[0,1] +; GCN-NEXT: s_mov_b64 s[4:5], s[0:1] +; GCN-NEXT: v_accvgpr_write_b32 a0, s0 +; GCN-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1] +; GCN-NEXT: v_accvgpr_write_b32 a1, s1 +; GCN-NEXT: v_accvgpr_write_b32 a2, s2 +; GCN-NEXT: v_accvgpr_write_b32 a3, s3 +; GCN-NEXT: v_accvgpr_write_b32 a4, s4 +; GCN-NEXT: v_accvgpr_write_b32 a5, s5 +; GCN-NEXT: v_accvgpr_write_b32 a6, s6 +; GCN-NEXT: v_accvgpr_write_b32 a7, s7 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 0 -; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[12:13] -; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[12:13] offset:16 +; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[8:9] +; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[8:9] offset:16 ; GCN-NEXT: s_endpgm bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> , i32 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll index 0ec4f64b38a1ba..a4d5fe4ffa5a75 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll @@ -12,11 +12,12 @@ define amdgpu_cs void @memmove_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src ; LOOP-NEXT: s_xor_b64 s[4:5], exec, s[0:1] ; LOOP-NEXT: s_cbranch_execz .LBB0_3 ; LOOP-NEXT: ; %bb.1: ; %copy_forward -; LOOP-NEXT: s_mov_b64 s[0:1], 0 +; LOOP-NEXT: s_mov_b64 s[6:7], 0 ; LOOP-NEXT: s_mov_b32 s2, 0 ; LOOP-NEXT: s_mov_b32 s3, 0xf000 -; LOOP-NEXT: v_mov_b32_e32 v5, s1 -; LOOP-NEXT: v_mov_b32_e32 v4, s0 +; LOOP-NEXT: s_mov_b64 s[0:1], 0 +; LOOP-NEXT: v_mov_b32_e32 v4, s6 +; LOOP-NEXT: v_mov_b32_e32 v5, s7 ; LOOP-NEXT: .LBB0_2: ; %copy_forward_loop ; LOOP-NEXT: ; =>This Inner Loop Header: Depth=1 ; LOOP-NEXT: v_add_i32_e32 v6, vcc, v2, v4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll index 7cd3babc709093..3edd2e0914a6ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll @@ -7,11 +7,12 @@ declare void @llvm.memset.p1.i32(ptr addrspace(1), i8, i32, i1) define amdgpu_cs void @memset_p1i8(ptr addrspace(1) %dst, i8 %val) { ; LOOP-LABEL: memset_p1i8: ; LOOP: ; %bb.0: ; %loadstoreloop.preheader -; LOOP-NEXT: s_mov_b64 s[0:1], 0 +; LOOP-NEXT: s_mov_b64 s[4:5], 0 ; LOOP-NEXT: s_mov_b32 s2, 0 ; LOOP-NEXT: s_mov_b32 s3, 0xf000 -; LOOP-NEXT: v_mov_b32_e32 v4, s1 -; LOOP-NEXT: v_mov_b32_e32 v3, s0 +; LOOP-NEXT: s_mov_b64 s[0:1], 0 +; LOOP-NEXT: v_mov_b32_e32 v3, s4 +; LOOP-NEXT: v_mov_b32_e32 v4, s5 ; LOOP-NEXT: .LBB0_1: ; %loadstoreloop ; LOOP-NEXT: ; =>This Inner Loop Header: Depth=1 ; LOOP-NEXT: v_add_i32_e32 v5, vcc, v0, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index 4248f7b6a15831..9dacdbc46be194 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -2259,12 +2259,13 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-LABEL: v_sdiv_i64_pow2_shl_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b64 s[4:5], 0x1000 -; CHECK-NEXT: v_lshl_b64 v[5:6], s[4:5], v2 -; CHECK-NEXT: v_mov_b32_e32 v4, v1 ; CHECK-NEXT: v_mov_b32_e32 v3, v0 -; CHECK-NEXT: v_or_b32_e32 v1, v4, v6 +; CHECK-NEXT: v_mov_b32_e32 v4, v1 +; CHECK-NEXT: v_mov_b32_e32 v0, 0x1000 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: v_lshl_b64 v[5:6], v[0:1], v2 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_or_b32_e32 v1, v4, v6 ; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc @@ -2717,161 +2718,163 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-LABEL: v_sdiv_v2i64_pow2_shl_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b64 s[6:7], 0x1000 -; CGP-NEXT: v_mov_b32_e32 v5, v2 -; CGP-NEXT: v_mov_b32_e32 v7, v3 -; CGP-NEXT: v_lshl_b64 v[2:3], s[6:7], v4 +; CGP-NEXT: s_mov_b64 s[4:5], 0x1000 +; CGP-NEXT: v_lshl_b64 v[11:12], s[4:5], v4 ; CGP-NEXT: v_mov_b32_e32 v9, v1 ; CGP-NEXT: v_mov_b32_e32 v8, v0 -; CGP-NEXT: v_or_b32_e32 v1, v9, v3 +; CGP-NEXT: v_or_b32_e32 v1, v9, v12 ; CGP-NEXT: v_mov_b32_e32 v0, 0 +; CGP-NEXT: v_mov_b32_e32 v5, v2 +; CGP-NEXT: v_mov_b32_e32 v7, v3 +; CGP-NEXT: v_mov_b32_e32 v2, 0x1000 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; CGP-NEXT: v_mov_b32_e32 v3, 0 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_2 ; CGP-NEXT: ; %bb.1: -; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v3 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v0 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v0, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v1, v0 -; CGP-NEXT: v_xor_b32_e32 v1, v3, v0 -; CGP-NEXT: v_cvt_f32_u32_e32 v3, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, v1 -; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v2 -; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v1, vcc -; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 -; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 -; CGP-NEXT: v_trunc_f32_e32 v10, v4 -; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v10 -; CGP-NEXT: v_cvt_u32_f32_e32 v12, v3 -; CGP-NEXT: v_cvt_u32_f32_e32 v15, v10 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v15, v[4:5] -; CGP-NEXT: v_mul_lo_u32 v4, v15, v3 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v12, v[10:11] -; CGP-NEXT: v_mul_hi_u32 v11, v12, v3 -; CGP-NEXT: v_mul_hi_u32 v3, v15, v3 -; CGP-NEXT: v_mul_lo_u32 v16, v12, v10 -; CGP-NEXT: v_mul_lo_u32 v17, v15, v10 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v16 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v16, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v17, v3 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 -; CGP-NEXT: v_mul_hi_u32 v10, v15, v10 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v3 -; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v4, vcc -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v13, v15, v[4:5] -; CGP-NEXT: v_ashrrev_i32_e32 v13, 31, v9 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v13 -; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v12, v[10:11] -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v13, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v4, v13 -; CGP-NEXT: v_mul_lo_u32 v4, v15, v3 -; CGP-NEXT: v_mul_lo_u32 v9, v12, v10 -; CGP-NEXT: v_xor_b32_e32 v14, v8, v13 -; CGP-NEXT: v_mul_hi_u32 v8, v12, v3 -; CGP-NEXT: v_mul_hi_u32 v3, v15, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v15, v10 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v12, v10 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v3 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v9, v15, v10 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, v15, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v14, v3 -; CGP-NEXT: v_mul_lo_u32 v9, v11, v4 -; CGP-NEXT: v_mul_hi_u32 v10, v11, v3 -; CGP-NEXT: v_mul_hi_u32 v3, v14, v3 -; CGP-NEXT: v_mul_hi_u32 v12, v14, v4 +; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v12 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v12, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v4, v1, v0 +; CGP-NEXT: v_xor_b32_e32 v1, v10, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v10, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v11, v1 +; CGP-NEXT: v_sub_i32_e32 v14, vcc, 0, v4 +; CGP-NEXT: v_subb_u32_e32 v15, vcc, 0, v1, vcc +; CGP-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 +; CGP-NEXT: v_rcp_iflag_f32_e32 v10, v10 +; CGP-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 +; CGP-NEXT: v_mul_f32_e32 v11, 0x2f800000, v10 +; CGP-NEXT: v_trunc_f32_e32 v12, v11 +; CGP-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v10 +; CGP-NEXT: v_cvt_u32_f32_e32 v16, v12 +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12] +; CGP-NEXT: v_mul_hi_u32 v17, v13, v10 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] +; CGP-NEXT: v_mul_lo_u32 v12, v16, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 +; CGP-NEXT: v_mul_lo_u32 v18, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v19, v16, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17 +; CGP-NEXT: v_mul_hi_u32 v17, v13, v11 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v18, v12 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v19, v10 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; CGP-NEXT: v_mul_hi_u32 v11, v16, v11 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v10 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, v16, v11, vcc +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12] +; CGP-NEXT: v_ashrrev_i32_e32 v14, 31, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] +; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v14, vcc +; CGP-NEXT: v_xor_b32_e32 v12, v8, v14 +; CGP-NEXT: v_mul_lo_u32 v8, v16, v10 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v11 +; CGP-NEXT: v_xor_b32_e32 v17, v9, v14 +; CGP-NEXT: v_mul_hi_u32 v9, v13, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v14, v4 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_mul_hi_u32 v9, v11, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v3 +; CGP-NEXT: v_mul_lo_u32 v9, v16, v11 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 +; CGP-NEXT: v_mul_hi_u32 v15, v13, v11 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; CGP-NEXT: v_mul_hi_u32 v11, v16, v11 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v3, v8 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v10, 0 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v8 -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v2, v12, v[4:5] -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v11, v3 -; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v1, v10, v[8:9] -; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v14, v8, vcc -; CGP-NEXT: v_sub_i32_e64 v8, s[4:5], v14, v8 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 -; CGP-NEXT: v_subb_u32_e32 v8, vcc, v8, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v2 -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v2 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v1 -; CGP-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v9, v11, s[4:5] -; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v10 -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v1 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 -; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v1 -; CGP-NEXT: v_cndmask_b32_e32 v1, v14, v2, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v9 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v13, v8 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, v16, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v17, v8 +; CGP-NEXT: v_mul_lo_u32 v11, v12, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v12, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v17, v8 +; CGP-NEXT: v_mul_hi_u32 v15, v17, v9 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v13, v17, v9 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v11, v12, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v13, v8 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v8, v10 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v13, 0 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v10 +; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v11, v[9:10] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v12, v8 +; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v1, v13, v[9:10] +; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v17, v9, vcc +; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v17, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v1 +; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v4 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v4 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v1 +; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_cndmask_b32_e64 v10, v12, v15, s[4:5] +; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v13 +; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v11, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v1 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v1 +; CGP-NEXT: v_cndmask_b32_e32 v1, v16, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v12 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v15, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v1, v9, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc -; CGP-NEXT: v_xor_b32_e32 v3, v13, v0 -; CGP-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc -; CGP-NEXT: v_xor_b32_e32 v0, v1, v3 -; CGP-NEXT: v_xor_b32_e32 v1, v2, v3 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc -; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v15, v8, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc +; CGP-NEXT: v_xor_b32_e32 v8, v14, v0 +; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v1, v8 +; CGP-NEXT: v_xor_b32_e32 v1, v4, v8 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc +; CGP-NEXT: ; implicit-def: $vgpr11_vgpr12 ; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: .LBB8_2: ; %Flow1 -; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[8:9] -; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 -; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: v_lshl_b64 v[9:10], v[2:3], v6 +; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] ; CGP-NEXT: s_cbranch_execz .LBB8_4 ; CGP-NEXT: ; %bb.3: -; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v11 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v11 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 @@ -2879,19 +2882,19 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 -; CGP-NEXT: v_mul_lo_u32 v1, v0, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v11 +; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v2 -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v11 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v11 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v11 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CGP-NEXT: v_mov_b32_e32 v1, 0 ; CGP-NEXT: .LBB8_4: -; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_b64 exec, exec, s[6:7] ; CGP-NEXT: v_or_b32_e32 v3, v7, v10 ; CGP-NEXT: v_mov_b32_e32 v2, 0 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index d0c55c69f50877..d1599ac489a5f5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -2219,12 +2219,13 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-LABEL: v_srem_i64_pow2_shl_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b64 s[4:5], 0x1000 -; CHECK-NEXT: v_lshl_b64 v[5:6], s[4:5], v2 -; CHECK-NEXT: v_mov_b32_e32 v4, v1 ; CHECK-NEXT: v_mov_b32_e32 v3, v0 -; CHECK-NEXT: v_or_b32_e32 v1, v4, v6 +; CHECK-NEXT: v_mov_b32_e32 v4, v1 +; CHECK-NEXT: v_mov_b32_e32 v0, 0x1000 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: v_lshl_b64 v[5:6], v[0:1], v2 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_or_b32_e32 v1, v4, v6 ; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc @@ -2671,159 +2672,164 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-LABEL: v_srem_v2i64_pow2_shl_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b64 s[6:7], 0x1000 -; CGP-NEXT: v_mov_b32_e32 v5, v2 -; CGP-NEXT: v_mov_b32_e32 v7, v3 -; CGP-NEXT: v_lshl_b64 v[2:3], s[6:7], v4 +; CGP-NEXT: s_mov_b64 s[4:5], 0x1000 +; CGP-NEXT: v_lshl_b64 v[11:12], s[4:5], v4 ; CGP-NEXT: v_mov_b32_e32 v9, v1 ; CGP-NEXT: v_mov_b32_e32 v8, v0 -; CGP-NEXT: v_or_b32_e32 v1, v9, v3 +; CGP-NEXT: v_or_b32_e32 v1, v9, v12 ; CGP-NEXT: v_mov_b32_e32 v0, 0 +; CGP-NEXT: v_mov_b32_e32 v5, v2 +; CGP-NEXT: v_mov_b32_e32 v7, v3 +; CGP-NEXT: v_mov_b32_e32 v2, 0x1000 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; CGP-NEXT: v_mov_b32_e32 v3, 0 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_2 ; CGP-NEXT: ; %bb.1: -; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v3 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v1 -; CGP-NEXT: v_addc_u32_e32 v2, vcc, v3, v1, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v12 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v11, v1 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v12, v1, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v0, v1 -; CGP-NEXT: v_xor_b32_e32 v1, v2, v1 -; CGP-NEXT: v_cvt_f32_u32_e32 v2, v0 -; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1 -; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v0 -; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v1, vcc -; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 -; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 -; CGP-NEXT: v_trunc_f32_e32 v4, v3 -; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 -; CGP-NEXT: v_cvt_u32_f32_e32 v10, v2 +; CGP-NEXT: v_xor_b32_e32 v1, v4, v1 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v10, v1 +; CGP-NEXT: v_sub_i32_e32 v14, vcc, 0, v0 +; CGP-NEXT: v_subb_u32_e32 v15, vcc, 0, v1, vcc +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v10 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v4 +; CGP-NEXT: v_trunc_f32_e32 v12, v10 +; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v12 ; CGP-NEXT: v_cvt_u32_f32_e32 v13, v4 -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v10, 0 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v13, v[3:4] -; CGP-NEXT: v_mul_hi_u32 v14, v10, v2 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v10, v[3:4] -; CGP-NEXT: v_mul_lo_u32 v4, v13, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v13, v2 -; CGP-NEXT: v_mul_lo_u32 v15, v10, v3 -; CGP-NEXT: v_mul_lo_u32 v16, v13, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 -; CGP-NEXT: v_mul_hi_u32 v14, v10, v3 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v16, v2 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v14, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v2 -; CGP-NEXT: v_addc_u32_e32 v13, vcc, v13, v3, vcc -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v10, 0 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v13, v[3:4] -; CGP-NEXT: v_ashrrev_i32_e32 v11, 31, v9 -; CGP-NEXT: v_mul_hi_u32 v14, v10, v2 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v10, v[3:4] -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v11 -; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v11, vcc -; CGP-NEXT: v_xor_b32_e32 v9, v4, v11 -; CGP-NEXT: v_mul_lo_u32 v4, v13, v2 -; CGP-NEXT: v_mul_lo_u32 v12, v10, v3 -; CGP-NEXT: v_mul_hi_u32 v2, v13, v2 -; CGP-NEXT: v_xor_b32_e32 v8, v8, v11 +; CGP-NEXT: v_cvt_u32_f32_e32 v16, v12 +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 +; CGP-NEXT: v_mov_b32_e32 v4, v11 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5] +; CGP-NEXT: v_mul_lo_u32 v4, v16, v10 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] +; CGP-NEXT: v_mul_hi_u32 v12, v13, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 +; CGP-NEXT: v_mul_lo_u32 v17, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v18, v16, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; CGP-NEXT: v_mul_hi_u32 v12, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v14, v13, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; CGP-NEXT: v_mul_hi_u32 v12, v10, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v14, v2 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v17, v4 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v18, v10 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v10, v2 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v13, v3, vcc -; CGP-NEXT: v_mul_lo_u32 v4, v8, v2 -; CGP-NEXT: v_mul_lo_u32 v10, v9, v3 -; CGP-NEXT: v_mul_hi_u32 v12, v9, v2 -; CGP-NEXT: v_mul_hi_u32 v2, v8, v2 -; CGP-NEXT: v_mul_hi_u32 v13, v8, v3 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v8, v3 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12 +; CGP-NEXT: v_mul_hi_u32 v11, v16, v11 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_mul_hi_u32 v10, v9, v3 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v2, v4 -; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v12, 0 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v4 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, v16, v10, vcc +; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 +; CGP-NEXT: v_mov_b32_e32 v4, v11 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5] +; CGP-NEXT: v_ashrrev_i32_e32 v14, 31, v9 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v14 +; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v14, vcc +; CGP-NEXT: v_xor_b32_e32 v12, v4, v14 +; CGP-NEXT: v_mul_lo_u32 v4, v16, v10 +; CGP-NEXT: v_mul_lo_u32 v9, v13, v11 +; CGP-NEXT: v_xor_b32_e32 v15, v8, v14 +; CGP-NEXT: v_mul_hi_u32 v8, v13, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_lo_u32 v8, v16, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CGP-NEXT: v_mul_hi_u32 v9, v13, v11 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v10, v16, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4] -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 -; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v12, v[3:4] -; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v8, v3, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v8, v3 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v1 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v0 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v1 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v9, vcc, v2, v0 -; CGP-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v1 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v0 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v16, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v15, v4 +; CGP-NEXT: v_mul_lo_u32 v10, v12, v8 +; CGP-NEXT: v_mul_hi_u32 v11, v12, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v15, v4 +; CGP-NEXT: v_mul_hi_u32 v13, v15, v8 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v15, v8 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v10, v12, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v4, v9 +; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v11, 0 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v4 +; CGP-NEXT: v_mov_b32_e32 v4, v9 +; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v0, v10, v[4:5] +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v12, v8 +; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v1, v11, v[9:10] +; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v15, v9, vcc +; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v15, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v0 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v1 +; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v4, v0 +; CGP-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v9, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v1 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v1 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v9, v0 -; CGP-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v0 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v1 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v9, v1, vcc +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v0 +; CGP-NEXT: v_cndmask_b32_e64 v13, v13, v15, s[4:5] ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 -; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; CGP-NEXT: v_xor_b32_e32 v0, v0, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v11 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v11, vcc -; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v1, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v0, v14 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v14 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v14, vcc +; CGP-NEXT: ; implicit-def: $vgpr11_vgpr12 ; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: .LBB8_2: ; %Flow1 -; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] -; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 +; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: v_lshl_b64 v[9:10], v[2:3], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_4 ; CGP-NEXT: ; %bb.3: -; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v11 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v11 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 @@ -2831,13 +2837,13 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 -; CGP-NEXT: v_mul_lo_u32 v0, v0, v2 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v11 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v8, v0 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v2 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v11 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v2 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v11 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CGP-NEXT: v_mov_b32_e32 v1, 0 ; CGP-NEXT: .LBB8_4: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll index 77737b356ff6e9..b3b57e14cb3fb5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -1070,11 +1070,12 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_mov_b32_e32 v3, v0 ; CHECK-NEXT: v_mov_b32_e32 v4, v1 -; CHECK-NEXT: s_mov_b64 s[4:5], 0x1000 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_lshl_b64 v[5:6], s[4:5], v2 -; CHECK-NEXT: v_or_b32_e32 v1, v4, v6 -; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; CHECK-NEXT: v_mov_b32_e32 v0, 0x1000 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: v_mov_b32_e32 v7, 0 +; CHECK-NEXT: v_lshl_b64 v[5:6], v[0:1], v2 +; CHECK-NEXT: v_or_b32_e32 v8, v4, v6 +; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[7:8] ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v5 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc @@ -1509,20 +1510,22 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mov_b32_e32 v9, v1 ; CGP-NEXT: v_mov_b32_e32 v5, v2 ; CGP-NEXT: v_mov_b32_e32 v7, v3 -; CGP-NEXT: s_mov_b64 s[6:7], 0x1000 +; CGP-NEXT: s_mov_b64 s[4:5], 0x1000 +; CGP-NEXT: v_mov_b32_e32 v10, 0x1000 +; CGP-NEXT: v_mov_b32_e32 v11, 0 ; CGP-NEXT: v_mov_b32_e32 v0, 0 -; CGP-NEXT: v_lshl_b64 v[2:3], s[6:7], v4 +; CGP-NEXT: v_lshl_b64 v[2:3], s[4:5], v4 ; CGP-NEXT: v_or_b32_e32 v1, v9, v3 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 -; CGP-NEXT: v_subb_u32_e32 v10, vcc, 0, v3, vcc +; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v0 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v4 ; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 @@ -1531,105 +1534,105 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v4 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v11, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v12, v1, v0 -; CGP-NEXT: v_mul_lo_u32 v13, v10, v0 -; CGP-NEXT: v_mul_hi_u32 v14, v1, v0 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_mul_lo_u32 v13, v4, v12 -; CGP-NEXT: v_mul_hi_u32 v15, v0, v12 -; CGP-NEXT: v_mul_hi_u32 v12, v4, v12 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; CGP-NEXT: v_mul_lo_u32 v14, v0, v11 -; CGP-NEXT: v_mul_lo_u32 v16, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v17, v0, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v4, v11 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v16, v12 +; CGP-NEXT: v_mul_lo_u32 v13, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v14, v1, v0 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v0 +; CGP-NEXT: v_mul_hi_u32 v16, v1, v0 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_mul_lo_u32 v15, v4, v14 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v14 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v14 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; CGP-NEXT: v_mul_lo_u32 v16, v0, v13 +; CGP-NEXT: v_mul_lo_u32 v18, v4, v13 +; CGP-NEXT: v_mul_hi_u32 v19, v0, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v4, v13 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc +; CGP-NEXT: v_mul_lo_u32 v13, v1, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v12, v0 +; CGP-NEXT: v_mul_hi_u32 v14, v1, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v15, v4, v13 +; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v4, v13 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14 +; CGP-NEXT: v_mul_lo_u32 v12, v0, v1 +; CGP-NEXT: v_mul_lo_u32 v14, v4, v1 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v4, v1 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v15 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v11, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v0 -; CGP-NEXT: v_mul_lo_u32 v10, v10, v0 -; CGP-NEXT: v_mul_hi_u32 v12, v1, v0 -; CGP-NEXT: v_mul_lo_u32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v13, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v14, v0, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v4, v11 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v1 -; CGP-NEXT: v_mul_lo_u32 v12, v4, v1 -; CGP-NEXT: v_mul_hi_u32 v15, v0, v1 -; CGP-NEXT: v_mul_hi_u32 v1, v4, v1 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc ; CGP-NEXT: v_mul_lo_u32 v4, v9, v0 -; CGP-NEXT: v_mul_hi_u32 v10, v8, v0 +; CGP-NEXT: v_mul_hi_u32 v12, v8, v0 ; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_mul_lo_u32 v11, v8, v1 -; CGP-NEXT: v_mul_lo_u32 v12, v9, v1 -; CGP-NEXT: v_mul_hi_u32 v13, v8, v1 +; CGP-NEXT: v_mul_lo_u32 v13, v8, v1 +; CGP-NEXT: v_mul_lo_u32 v14, v9, v1 +; CGP-NEXT: v_mul_hi_u32 v15, v8, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v9, v1 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v12, v0 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v15 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_mul_lo_u32 v10, v2, v0 -; CGP-NEXT: v_mul_lo_u32 v11, v3, v0 -; CGP-NEXT: v_mul_hi_u32 v12, v2, v0 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 +; CGP-NEXT: v_mul_lo_u32 v12, v2, v0 +; CGP-NEXT: v_mul_lo_u32 v13, v3, v0 +; CGP-NEXT: v_mul_hi_u32 v14, v2, v0 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 ; CGP-NEXT: v_mul_lo_u32 v4, v2, v1 -; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v0 -; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v13 -; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v14, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v9, v4, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v0 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v15 +; CGP-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v12 +; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v9, v4, vcc ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v9, v4 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v3 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v3 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] ; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v3, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v3 -; CGP-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v3 +; CGP-NEXT: v_cndmask_b32_e32 v9, v14, v9, vcc ; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v2 ; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2 @@ -1639,8 +1642,8 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 ; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v13, v11, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v14, v15, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v15, v13, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v16, v17, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc @@ -1648,9 +1651,9 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: .LBB8_2: ; %Flow1 -; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[8:9] -; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 -; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: v_lshl_b64 v[9:10], v[10:11], v6 +; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] ; CGP-NEXT: s_cbranch_execz .LBB8_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v4 @@ -1673,7 +1676,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; CGP-NEXT: v_mov_b32_e32 v1, 0 ; CGP-NEXT: .LBB8_4: -; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_b64 exec, exec, s[6:7] ; CGP-NEXT: v_or_b32_e32 v3, v7, v10 ; CGP-NEXT: v_mov_b32_e32 v2, 0 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index 097f6642cbc669..ecf7cc921886c0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -1576,11 +1576,12 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_mov_b32_e32 v3, v0 ; CHECK-NEXT: v_mov_b32_e32 v4, v1 -; CHECK-NEXT: s_mov_b64 s[4:5], 0x1000 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_lshl_b64 v[5:6], s[4:5], v2 -; CHECK-NEXT: v_or_b32_e32 v1, v4, v6 -; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; CHECK-NEXT: v_mov_b32_e32 v0, 0x1000 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: v_mov_b32_e32 v7, 0 +; CHECK-NEXT: v_lshl_b64 v[5:6], v[0:1], v2 +; CHECK-NEXT: v_or_b32_e32 v8, v4, v6 +; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[7:8] ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v5 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc @@ -2010,20 +2011,22 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mov_b32_e32 v9, v1 ; CGP-NEXT: v_mov_b32_e32 v5, v2 ; CGP-NEXT: v_mov_b32_e32 v7, v3 -; CGP-NEXT: s_mov_b64 s[6:7], 0x1000 +; CGP-NEXT: s_mov_b64 s[4:5], 0x1000 +; CGP-NEXT: v_mov_b32_e32 v10, 0x1000 +; CGP-NEXT: v_mov_b32_e32 v11, 0 ; CGP-NEXT: v_mov_b32_e32 v0, 0 -; CGP-NEXT: v_lshl_b64 v[2:3], s[6:7], v4 +; CGP-NEXT: v_lshl_b64 v[2:3], s[4:5], v4 ; CGP-NEXT: v_or_b32_e32 v1, v9, v3 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_2 ; CGP-NEXT: ; %bb.1: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 -; CGP-NEXT: v_subb_u32_e32 v10, vcc, 0, v3, vcc +; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v0 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v4 ; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 @@ -2032,92 +2035,92 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v4 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v11, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v12, v1, v0 -; CGP-NEXT: v_mul_lo_u32 v13, v10, v0 -; CGP-NEXT: v_mul_hi_u32 v14, v1, v0 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_mul_lo_u32 v13, v4, v12 -; CGP-NEXT: v_mul_hi_u32 v15, v0, v12 -; CGP-NEXT: v_mul_hi_u32 v12, v4, v12 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; CGP-NEXT: v_mul_lo_u32 v14, v0, v11 -; CGP-NEXT: v_mul_lo_u32 v16, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v17, v0, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v4, v11 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v16, v12 +; CGP-NEXT: v_mul_lo_u32 v13, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v14, v1, v0 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v0 +; CGP-NEXT: v_mul_hi_u32 v16, v1, v0 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_mul_lo_u32 v15, v4, v14 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v14 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v14 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; CGP-NEXT: v_mul_lo_u32 v16, v0, v13 +; CGP-NEXT: v_mul_lo_u32 v18, v4, v13 +; CGP-NEXT: v_mul_hi_u32 v19, v0, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v4, v13 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 +; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc +; CGP-NEXT: v_mul_lo_u32 v13, v1, v0 +; CGP-NEXT: v_mul_lo_u32 v12, v12, v0 +; CGP-NEXT: v_mul_hi_u32 v14, v1, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v15, v4, v13 +; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v4, v13 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14 +; CGP-NEXT: v_mul_lo_u32 v12, v0, v1 +; CGP-NEXT: v_mul_lo_u32 v14, v4, v1 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v1 +; CGP-NEXT: v_mul_hi_u32 v1, v4, v1 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v15 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v11, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v0 -; CGP-NEXT: v_mul_lo_u32 v10, v10, v0 -; CGP-NEXT: v_mul_hi_u32 v12, v1, v0 -; CGP-NEXT: v_mul_lo_u32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v13, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v14, v0, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v4, v11 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v1 -; CGP-NEXT: v_mul_lo_u32 v12, v4, v1 -; CGP-NEXT: v_mul_hi_u32 v15, v0, v1 -; CGP-NEXT: v_mul_hi_u32 v1, v4, v1 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc ; CGP-NEXT: v_mul_lo_u32 v4, v9, v0 -; CGP-NEXT: v_mul_hi_u32 v10, v8, v0 +; CGP-NEXT: v_mul_hi_u32 v12, v8, v0 ; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 -; CGP-NEXT: v_mul_lo_u32 v11, v8, v1 -; CGP-NEXT: v_mul_lo_u32 v12, v9, v1 -; CGP-NEXT: v_mul_hi_u32 v13, v8, v1 +; CGP-NEXT: v_mul_lo_u32 v13, v8, v1 +; CGP-NEXT: v_mul_lo_u32 v14, v9, v1 +; CGP-NEXT: v_mul_hi_u32 v15, v8, v1 ; CGP-NEXT: v_mul_hi_u32 v1, v9, v1 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v12, v0 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v15 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 -; CGP-NEXT: v_mul_lo_u32 v10, v2, v0 -; CGP-NEXT: v_mul_lo_u32 v11, v3, v0 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 +; CGP-NEXT: v_mul_lo_u32 v12, v2, v0 +; CGP-NEXT: v_mul_lo_u32 v13, v3, v0 ; CGP-NEXT: v_mul_hi_u32 v0, v2, v0 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 ; CGP-NEXT: v_mul_lo_u32 v1, v2, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v10 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v12 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v9, v0, vcc ; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v9, v0 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 @@ -2128,19 +2131,19 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 ; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc ; CGP-NEXT: v_sub_i32_e32 v9, vcc, v1, v2 -; CGP-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v0, vcc +; CGP-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v0, vcc ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v2 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] ; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v10, v3 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v3 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 ; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v3 -; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v11, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v3 +; CGP-NEXT: v_cndmask_b32_e32 v3, v14, v13, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v0, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v0, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc @@ -2148,8 +2151,8 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: .LBB8_2: ; %Flow1 -; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] -; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 +; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: v_lshl_b64 v[9:10], v[10:11], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB8_4 ; CGP-NEXT: ; %bb.3: diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll index 8c4483fc118dbb..bf5843ea8047d5 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -691,12 +691,12 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 ; GFX90A-NEXT: s_sub_i32 s4, 0, s3 ; GFX90A-NEXT: v_mov_b32_e32 v19, 0 -; GFX90A-NEXT: v_pk_mov_b32 v[2:3], 0, 0 -; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v0 -; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, s9 -; GFX90A-NEXT: v_readfirstlane_b32 s10, v1 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], 0, 0 +; GFX90A-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v3, v2 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v2, s9 +; GFX90A-NEXT: v_readfirstlane_b32 s10, v3 ; GFX90A-NEXT: s_mul_i32 s4, s4, s10 ; GFX90A-NEXT: s_mul_hi_u32 s4, s10, s4 ; GFX90A-NEXT: s_add_i32 s10, s10, s4 @@ -713,7 +713,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX90A-NEXT: s_cselect_b32 s4, s10, s4 ; GFX90A-NEXT: s_lshr_b32 s9, s9, 16 ; GFX90A-NEXT: s_lshl_b64 s[12:13], s[4:5], 5 -; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, s9 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v3, s9 ; GFX90A-NEXT: s_lshl_b64 s[2:3], s[0:1], 5 ; GFX90A-NEXT: s_lshl_b64 s[10:11], s[6:7], 5 ; GFX90A-NEXT: s_or_b32 s10, s10, 28 @@ -737,7 +737,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX90A-NEXT: s_cbranch_scc0 .LBB3_10 ; GFX90A-NEXT: ; %bb.3: ; %bb14 ; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1 -; GFX90A-NEXT: global_load_dwordx2 v[4:5], v[2:3], off +; GFX90A-NEXT: global_load_dwordx2 v[4:5], v[0:1], off ; GFX90A-NEXT: v_cmp_gt_i64_e64 s[0:1], s[6:7], -1 ; GFX90A-NEXT: s_mov_b32 s9, s8 ; GFX90A-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1] @@ -795,7 +795,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX90A-NEXT: v_cvt_f32_f16_e32 v22, v21 ; GFX90A-NEXT: v_cvt_f32_f16_sdwa v21, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX90A-NEXT: v_cvt_f32_f16_e32 v20, v20 -; GFX90A-NEXT: v_pk_add_f32 v[24:25], v[0:1], v[14:15] +; GFX90A-NEXT: v_pk_add_f32 v[24:25], v[2:3], v[14:15] ; GFX90A-NEXT: v_pk_add_f32 v[26:27], v[14:15], 0 op_sel_hi:[1,0] ; GFX90A-NEXT: v_pk_add_f32 v[16:17], v[22:23], v[16:17] ; GFX90A-NEXT: v_pk_add_f32 v[14:15], v[20:21], v[14:15] diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index d342c4ffa37b01..2b444e5e0e1f3b 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -7910,8 +7910,7 @@ define amdgpu_kernel void @urem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 ; GFX6-NEXT: s_mov_b32 s1, s5 -; GFX6-NEXT: s_mov_b64 s[4:5], 0x1000 -; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], s8 +; GFX6-NEXT: s_lshl_b64 s[4:5], 0x1000, s8 ; GFX6-NEXT: s_add_u32 s4, s4, -1 ; GFX6-NEXT: s_addc_u32 s5, s5, -1 ; GFX6-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] @@ -7923,10 +7922,9 @@ define amdgpu_kernel void @urem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x34 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b64 s[0:1], 0x1000 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 +; GFX9-NEXT: s_lshl_b64 s[0:1], 0x1000, s2 ; GFX9-NEXT: s_add_u32 s0, s0, -1 ; GFX9-NEXT: s_addc_u32 s1, s1, -1 ; GFX9-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1] @@ -8000,45 +7998,43 @@ define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; ; GFX6-LABEL: urem_v2i64_pow2_shl_denom: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0xd -; GFX6-NEXT: s_mov_b64 s[12:13], 0x1000 -; GFX6-NEXT: s_mov_b32 s11, 0xf000 -; GFX6-NEXT: s_mov_b32 s10, -1 +; GFX6-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b64 s[6:7], s[12:13], s6 -; GFX6-NEXT: s_lshl_b64 s[4:5], s[12:13], s4 -; GFX6-NEXT: s_add_u32 s4, s4, -1 -; GFX6-NEXT: s_addc_u32 s5, s5, -1 -; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] -; GFX6-NEXT: s_add_u32 s4, s6, -1 -; GFX6-NEXT: s_addc_u32 s5, s7, -1 -; GFX6-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5] -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: v_mov_b32_e32 v2, s2 -; GFX6-NEXT: v_mov_b32_e32 v3, s3 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 +; GFX6-NEXT: s_lshl_b64 s[10:11], 0x1000, s10 +; GFX6-NEXT: s_lshl_b64 s[8:9], 0x1000, s8 +; GFX6-NEXT: s_add_u32 s8, s8, -1 +; GFX6-NEXT: s_addc_u32 s9, s9, -1 +; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] +; GFX6-NEXT: s_add_u32 s8, s10, -1 +; GFX6-NEXT: s_addc_u32 s9, s11, -1 +; GFX6-NEXT: s_and_b64 s[6:7], s[6:7], s[8:9] +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: v_mov_b32_e32 v2, s6 +; GFX6-NEXT: v_mov_b32_e32 v3, s7 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; GFX9-LABEL: urem_v2i64_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b64 s[2:3], 0x1000 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshl_b64 s[10:11], s[2:3], s10 -; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 +; GFX9-NEXT: s_lshl_b64 s[2:3], 0x1000, s10 +; GFX9-NEXT: s_lshl_b64 s[8:9], 0x1000, s8 +; GFX9-NEXT: s_add_u32 s8, s8, -1 +; GFX9-NEXT: s_addc_u32 s9, s9, -1 +; GFX9-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX9-NEXT: s_add_u32 s2, s2, -1 ; GFX9-NEXT: s_addc_u32 s3, s3, -1 -; GFX9-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3] -; GFX9-NEXT: s_add_u32 s4, s10, -1 -; GFX9-NEXT: s_addc_u32 s5, s11, -1 -; GFX9-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: v_mov_b32_e32 v3, s5 +; GFX9-NEXT: s_and_b64 s[2:3], s[6:7], s[2:3] +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NEXT: v_mov_b32_e32 v3, s3 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm %shl.y = shl <2 x i64> , %y @@ -8297,12 +8293,11 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; ; GFX6-LABEL: sdiv_i64_pow2_shl_denom: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xd -; GFX6-NEXT: s_mov_b64 s[2:3], 0x1000 +; GFX6-NEXT: s_load_dword s2, s[0:1], 0xd ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 +; GFX6-NEXT: s_lshl_b64 s[2:3], 0x1000, s2 ; GFX6-NEXT: s_ashr_i32 s8, s3, 31 ; GFX6-NEXT: s_add_u32 s2, s2, s8 ; GFX6-NEXT: s_mov_b32 s9, s8 @@ -8334,17 +8329,16 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; GFX6-NEXT: v_mul_hi_u32 v3, v0, v4 ; GFX6-NEXT: v_mul_lo_u32 v5, v0, v2 -; GFX6-NEXT: v_mul_hi_u32 v6, v0, v2 -; GFX6-NEXT: v_mul_hi_u32 v7, v1, v2 -; GFX6-NEXT: v_mul_lo_u32 v2, v1, v2 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc +; GFX6-NEXT: v_mul_hi_u32 v7, v0, v2 ; GFX6-NEXT: v_mul_lo_u32 v6, v1, v4 ; GFX6-NEXT: v_mul_hi_u32 v4, v1, v4 -; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[12:13] +; GFX6-NEXT: v_mul_hi_u32 v8, v1, v2 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc +; GFX6-NEXT: v_mul_lo_u32 v2, v1, v2 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v5, v4, vcc -; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc +; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v8, vcc ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 @@ -8352,6 +8346,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: v_mul_lo_u32 v2, s4, v1 ; GFX6-NEXT: v_mul_hi_u32 v3, s4, v0 ; GFX6-NEXT: v_mul_lo_u32 v4, s5, v0 +; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[12:13] ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GFX6-NEXT: v_mul_lo_u32 v3, s4, v0 @@ -8433,10 +8428,9 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: s_endpgm ; GFX9-LABEL: sdiv_i64_pow2_shl_denom: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s4, s[0:1], 0x34 -; GFX9-NEXT: s_mov_b64 s[2:3], 0x1000 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshl_b64 s[4:5], s[2:3], s4 +; GFX9-NEXT: s_lshl_b64 s[4:5], 0x1000, s2 ; GFX9-NEXT: s_ashr_i32 s2, s5, 31 ; GFX9-NEXT: s_add_u32 s4, s4, s2 ; GFX9-NEXT: s_mov_b32 s3, s2 @@ -8462,12 +8456,12 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX9-NEXT: s_mul_hi_u32 s14, s0, s11 ; GFX9-NEXT: s_mul_i32 s13, s1, s11 ; GFX9-NEXT: s_add_i32 s12, s14, s12 -; GFX9-NEXT: s_add_i32 s12, s12, s13 ; GFX9-NEXT: s_mul_i32 s15, s0, s11 +; GFX9-NEXT: s_add_i32 s12, s12, s13 +; GFX9-NEXT: s_mul_hi_u32 s14, s11, s15 ; GFX9-NEXT: s_mul_hi_u32 s13, s11, s12 -; GFX9-NEXT: s_mul_i32 s14, s11, s12 -; GFX9-NEXT: s_mul_hi_u32 s11, s11, s15 -; GFX9-NEXT: s_add_u32 s11, s11, s14 +; GFX9-NEXT: s_mul_i32 s11, s11, s12 +; GFX9-NEXT: s_add_u32 s11, s14, s11 ; GFX9-NEXT: s_addc_u32 s13, 0, s13 ; GFX9-NEXT: s_mul_hi_u32 s16, s10, s15 ; GFX9-NEXT: s_mul_i32 s15, s10, s15 @@ -8884,10 +8878,9 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX6-LABEL: sdiv_v2i64_pow2_shl_denom: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd -; GFX6-NEXT: s_mov_b64 s[2:3], 0x1000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b64 s[12:13], s[2:3], s10 -; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 +; GFX6-NEXT: s_lshl_b64 s[2:3], 0x1000, s8 +; GFX6-NEXT: s_lshl_b64 s[12:13], 0x1000, s10 ; GFX6-NEXT: s_ashr_i32 s14, s3, 31 ; GFX6-NEXT: s_add_u32 s2, s2, s14 ; GFX6-NEXT: s_mov_b32 s15, s14 @@ -8920,15 +8913,15 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX6-NEXT: v_mul_hi_u32 v3, v0, v4 ; GFX6-NEXT: v_mul_lo_u32 v5, v0, v2 ; GFX6-NEXT: v_mul_hi_u32 v7, v0, v2 -; GFX6-NEXT: v_mul_lo_u32 v6, v1, v4 -; GFX6-NEXT: v_mul_hi_u32 v4, v1, v4 +; GFX6-NEXT: v_mul_hi_u32 v6, v1, v4 +; GFX6-NEXT: v_mul_lo_u32 v4, v1, v4 +; GFX6-NEXT: v_mul_hi_u32 v8, v1, v2 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc -; GFX6-NEXT: v_mul_hi_u32 v7, v1, v2 ; GFX6-NEXT: v_mul_lo_u32 v2, v1, v2 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v5, v4, vcc -; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v5, v6, vcc +; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v8, vcc ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 @@ -9141,11 +9134,10 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX9-LABEL: sdiv_v2i64_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b64 s[2:3], 0x1000 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshl_b64 s[10:11], s[2:3], s10 -; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 +; GFX9-NEXT: s_lshl_b64 s[2:3], 0x1000, s8 +; GFX9-NEXT: s_lshl_b64 s[10:11], 0x1000, s10 ; GFX9-NEXT: s_ashr_i32 s8, s3, 31 ; GFX9-NEXT: s_add_u32 s2, s2, s8 ; GFX9-NEXT: s_mov_b32 s9, s8 @@ -9170,8 +9162,8 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX9-NEXT: s_mul_hi_u32 s18, s0, s15 ; GFX9-NEXT: s_mul_i32 s17, s1, s15 ; GFX9-NEXT: s_add_i32 s16, s18, s16 -; GFX9-NEXT: s_add_i32 s16, s16, s17 ; GFX9-NEXT: s_mul_i32 s19, s0, s15 +; GFX9-NEXT: s_add_i32 s16, s16, s17 ; GFX9-NEXT: s_mul_hi_u32 s17, s15, s16 ; GFX9-NEXT: s_mul_i32 s18, s15, s16 ; GFX9-NEXT: s_mul_hi_u32 s15, s15, s19 @@ -9691,12 +9683,11 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; ; GFX6-LABEL: srem_i64_pow2_shl_denom: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xd -; GFX6-NEXT: s_mov_b64 s[2:3], 0x1000 +; GFX6-NEXT: s_load_dword s2, s[0:1], 0xd ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 +; GFX6-NEXT: s_lshl_b64 s[2:3], 0x1000, s2 ; GFX6-NEXT: s_ashr_i32 s4, s3, 31 ; GFX6-NEXT: s_add_u32 s2, s2, s4 ; GFX6-NEXT: s_mov_b32 s5, s4 @@ -9728,17 +9719,16 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; GFX6-NEXT: v_mul_hi_u32 v3, v0, v4 ; GFX6-NEXT: v_mul_lo_u32 v5, v0, v2 -; GFX6-NEXT: v_mul_hi_u32 v6, v0, v2 -; GFX6-NEXT: v_mul_hi_u32 v7, v1, v2 -; GFX6-NEXT: v_mul_lo_u32 v2, v1, v2 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc +; GFX6-NEXT: v_mul_hi_u32 v7, v0, v2 ; GFX6-NEXT: v_mul_lo_u32 v6, v1, v4 ; GFX6-NEXT: v_mul_hi_u32 v4, v1, v4 -; GFX6-NEXT: s_xor_b64 s[12:13], s[2:3], s[10:11] +; GFX6-NEXT: v_mul_hi_u32 v8, v1, v2 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc +; GFX6-NEXT: v_mul_lo_u32 v2, v1, v2 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v5, v4, vcc -; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc +; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v8, vcc ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 @@ -9746,6 +9736,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: v_mul_lo_u32 v2, s4, v1 ; GFX6-NEXT: v_mul_hi_u32 v3, s4, v0 ; GFX6-NEXT: v_mul_lo_u32 v4, s5, v0 +; GFX6-NEXT: s_xor_b64 s[12:13], s[2:3], s[10:11] ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GFX6-NEXT: v_mul_lo_u32 v3, s4, v0 @@ -9825,10 +9816,9 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: s_endpgm ; GFX9-LABEL: srem_i64_pow2_shl_denom: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s4, s[0:1], 0x34 -; GFX9-NEXT: s_mov_b64 s[2:3], 0x1000 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 +; GFX9-NEXT: s_lshl_b64 s[2:3], 0x1000, s2 ; GFX9-NEXT: s_ashr_i32 s4, s3, 31 ; GFX9-NEXT: s_add_u32 s2, s2, s4 ; GFX9-NEXT: s_mov_b32 s5, s4 @@ -9854,12 +9844,12 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX9-NEXT: s_mul_hi_u32 s12, s0, s3 ; GFX9-NEXT: s_mul_i32 s11, s1, s3 ; GFX9-NEXT: s_add_i32 s10, s12, s10 -; GFX9-NEXT: s_add_i32 s10, s10, s11 ; GFX9-NEXT: s_mul_i32 s13, s0, s3 +; GFX9-NEXT: s_add_i32 s10, s10, s11 +; GFX9-NEXT: s_mul_hi_u32 s12, s3, s13 ; GFX9-NEXT: s_mul_hi_u32 s11, s3, s10 -; GFX9-NEXT: s_mul_i32 s12, s3, s10 -; GFX9-NEXT: s_mul_hi_u32 s3, s3, s13 -; GFX9-NEXT: s_add_u32 s3, s3, s12 +; GFX9-NEXT: s_mul_i32 s3, s3, s10 +; GFX9-NEXT: s_add_u32 s3, s12, s3 ; GFX9-NEXT: s_addc_u32 s11, 0, s11 ; GFX9-NEXT: s_mul_hi_u32 s14, s2, s13 ; GFX9-NEXT: s_mul_i32 s13, s2, s13 @@ -10063,11 +10053,10 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX6-LABEL: srem_v2i64_pow2_shl_denom: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd -; GFX6-NEXT: s_mov_b64 s[2:3], 0x1000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s11, 0xf000 -; GFX6-NEXT: s_lshl_b64 s[16:17], s[2:3], s10 -; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 +; GFX6-NEXT: s_lshl_b64 s[2:3], 0x1000, s8 +; GFX6-NEXT: s_lshl_b64 s[16:17], 0x1000, s10 ; GFX6-NEXT: s_ashr_i32 s8, s3, 31 ; GFX6-NEXT: s_add_u32 s2, s2, s8 ; GFX6-NEXT: s_mov_b32 s9, s8 @@ -10101,15 +10090,15 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX6-NEXT: v_mul_hi_u32 v3, v0, v4 ; GFX6-NEXT: v_mul_lo_u32 v5, v0, v2 ; GFX6-NEXT: v_mul_hi_u32 v7, v0, v2 -; GFX6-NEXT: v_mul_lo_u32 v6, v1, v4 -; GFX6-NEXT: v_mul_hi_u32 v4, v1, v4 +; GFX6-NEXT: v_mul_hi_u32 v6, v1, v4 +; GFX6-NEXT: v_mul_lo_u32 v4, v1, v4 +; GFX6-NEXT: v_mul_hi_u32 v8, v1, v2 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc -; GFX6-NEXT: v_mul_hi_u32 v7, v1, v2 ; GFX6-NEXT: v_mul_lo_u32 v2, v1, v2 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v5, v4, vcc -; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v5, v6, vcc +; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v8, vcc ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 @@ -10316,11 +10305,10 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX9-LABEL: srem_v2i64_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b64 s[2:3], 0x1000 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshl_b64 s[10:11], s[2:3], s10 -; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 +; GFX9-NEXT: s_lshl_b64 s[2:3], 0x1000, s8 +; GFX9-NEXT: s_lshl_b64 s[10:11], 0x1000, s10 ; GFX9-NEXT: s_ashr_i32 s8, s3, 31 ; GFX9-NEXT: s_add_u32 s2, s2, s8 ; GFX9-NEXT: s_mov_b32 s9, s8 @@ -10345,8 +10333,8 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX9-NEXT: s_mul_hi_u32 s16, s0, s3 ; GFX9-NEXT: s_mul_i32 s15, s1, s3 ; GFX9-NEXT: s_add_i32 s14, s16, s14 -; GFX9-NEXT: s_add_i32 s14, s14, s15 ; GFX9-NEXT: s_mul_i32 s17, s0, s3 +; GFX9-NEXT: s_add_i32 s14, s14, s15 ; GFX9-NEXT: s_mul_hi_u32 s15, s3, s14 ; GFX9-NEXT: s_mul_i32 s16, s3, s14 ; GFX9-NEXT: s_mul_hi_u32 s3, s3, s17 diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index abd9a4159f8ccd..60c31344452182 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -4416,9 +4416,9 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX7LESS-NEXT: s_cbranch_execz .LBB18_2 ; GFX7LESS-NEXT: ; %bb.1: -; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1] @@ -4452,8 +4452,8 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_cbranch_execz .LBB18_2 ; GFX8-NEXT: ; %bb.1: ; GFX8-NEXT: v_mov_b32_e32 v0, 5 -; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: s_mov_b32 m0, -1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1] @@ -4966,9 +4966,9 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX7LESS-NEXT: s_cbranch_execz .LBB20_2 ; GFX7LESS-NEXT: ; %bb.1: -; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1] @@ -5002,8 +5002,8 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_cbranch_execz .LBB20_2 ; GFX8-NEXT: ; %bb.1: ; GFX8-NEXT: v_mov_b32_e32 v0, 5 -; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: s_mov_b32 m0, -1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1] @@ -5516,9 +5516,9 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX7LESS-NEXT: s_cbranch_execz .LBB22_2 ; GFX7LESS-NEXT: ; %bb.1: -; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1] @@ -5551,8 +5551,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_cbranch_execz .LBB22_2 ; GFX8-NEXT: ; %bb.1: ; GFX8-NEXT: v_mov_b32_e32 v0, 5 -; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: s_mov_b32 m0, -1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1] @@ -6061,9 +6061,9 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX7LESS-NEXT: s_cbranch_execz .LBB24_2 ; GFX7LESS-NEXT: ; %bb.1: -; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, 0 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1] @@ -6096,8 +6096,8 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_cbranch_execz .LBB24_2 ; GFX8-NEXT: ; %bb.1: ; GFX8-NEXT: v_mov_b32_e32 v0, 5 -; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: s_mov_b32 m0, -1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll index 6efbd6ce87385e..8082a0646d4a13 100644 --- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll +++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll @@ -2533,8 +2533,7 @@ define i1 @test124(i32 %arg1, i64 %arg2) { ; GCN-LABEL: test124: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b64 s[0:1], 0x3e8 -; GCN-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[1:2] +; GCN-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0x3e8, v[1:2] ; GCN-NEXT: v_cmp_gt_i32_e64 s0, 0x3e8, v0 ; GCN-NEXT: s_or_b32 s0, s0, vcc_lo ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 diff --git a/llvm/test/CodeGen/AMDGPU/commute-compares.ll b/llvm/test/CodeGen/AMDGPU/commute-compares.ll index c703a1dd7734dc..6997913f1ae161 100644 --- a/llvm/test/CodeGen/AMDGPU/commute-compares.ll +++ b/llvm/test/CodeGen/AMDGPU/commute-compares.ll @@ -250,8 +250,8 @@ define amdgpu_kernel void @commute_ule_63_i64(ptr addrspace(1) %out, ptr addrspa ; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm ; GCN-LABEL: {{^}}commute_ule_64_i64: -; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}} -; GCN: v_cmp_gt_u64_e32 vcc, s[[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +; GCN: s_mov_b64 [[K:s\[[0-9:]+\]]], 0x41 +; GCN: v_cmp_gt_u64_e32 vcc, [[K]], v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @commute_ule_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid diff --git a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll index bd0c2b30eb5dea..53c9861b7a0516 100644 --- a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll +++ b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll @@ -310,8 +310,7 @@ define amdgpu_ps i64 @s_csh_64_1(i64 inreg %a, i64 inreg %b) { ; ; GISEL-LABEL: s_csh_64_1: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_mov_b64 s[4:5], 0xff -; GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5] +; GISEL-NEXT: s_and_b64 s[2:3], s[2:3], 0xff ; GISEL-NEXT: s_lshl_b64 s[4:5], s[0:1], s2 ; GISEL-NEXT: s_lshr_b64 s[6:7], s[0:1], s2 ; GISEL-NEXT: s_ashr_i64 s[0:1], s[0:1], s2 diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll index 86d0df494bcacd..d9e0ddd3b90448 100644 --- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll +++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll @@ -27,29 +27,27 @@ define float @v_mul_42_f32(float %x) { } define double @v_mul_42_f64(double %x) { -; GFX9-LABEL: v_mul_42_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40450000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_42_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0 +; GFX9-SDAG-NEXT: s_mov_b32 s5, 0x40450000 +; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_42_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40450000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_42_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40450000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_42_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40450000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_42_f64: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_mul_f64 v[0:1], 0x40450000, v[0:1] +; GFX1011-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 42.0 ret double %mul } @@ -726,9 +724,9 @@ define double @v_mul_0x1pn1031_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1pn1031_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_movk_i32 s5, 0x800 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x800 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1pn1031_f64: @@ -740,9 +738,7 @@ define double @v_mul_0x1pn1031_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1pn1031_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_movk_i32 s5, 0x800 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x800, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1pn1031_f64: @@ -754,9 +750,7 @@ define double @v_mul_0x1pn1031_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1pn1031_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_movk_i32 s1, 0x800 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x800, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 4.34584737989687770135e-311 ret double %mul @@ -774,9 +768,9 @@ define double @v_mul_0x1pn1022_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1pn1022_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x100000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x100000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1pn1022_f64: @@ -788,9 +782,7 @@ define double @v_mul_0x1pn1022_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1pn1022_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x100000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x100000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1pn1022_f64: @@ -802,9 +794,7 @@ define double @v_mul_0x1pn1022_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1pn1022_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x100000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x100000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 2.22507385850720138309e-308 ret double %mul @@ -822,9 +812,9 @@ define double @v_mul_0x1pn1021_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1pn1021_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x200000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x200000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1pn1021_f64: @@ -836,9 +826,7 @@ define double @v_mul_0x1pn1021_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1pn1021_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x200000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x200000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1pn1021_f64: @@ -850,9 +838,7 @@ define double @v_mul_0x1pn1021_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1pn1021_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x200000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x200000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 4.45014771701440276618e-308 ret double %mul @@ -870,9 +856,9 @@ define double @v_mul_0x1pn64_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1pn64_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x3bf00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3bf00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1pn64_f64: @@ -884,9 +870,7 @@ define double @v_mul_0x1pn64_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1pn64_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x3bf00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x3bf00000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1pn64_f64: @@ -898,9 +882,7 @@ define double @v_mul_0x1pn64_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1pn64_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x3bf00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x3bf00000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 5.42101086242752217004e-20 ret double %mul @@ -918,9 +900,9 @@ define double @v_mul_0x1pn17_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1pn17_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x3ee00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3ee00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1pn17_f64: @@ -932,9 +914,7 @@ define double @v_mul_0x1pn17_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1pn17_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x3ee00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x3ee00000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1pn17_f64: @@ -946,9 +926,7 @@ define double @v_mul_0x1pn17_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1pn17_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x3ee00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x3ee00000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 0.00000762939453125 ret double %mul @@ -965,9 +943,9 @@ define double @v_mul_0x1pn16_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1pn16_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x3ef00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3ef00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1pn16_f64: @@ -979,9 +957,7 @@ define double @v_mul_0x1pn16_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1pn16_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x3ef00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x3ef00000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1pn16_f64: @@ -993,9 +969,7 @@ define double @v_mul_0x1pn16_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1pn16_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x3ef00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x3ef00000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 0.0000152587890625 ret double %mul @@ -1012,9 +986,9 @@ define double @v_mul_0x1pn15_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1pn15_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0.5 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0.5 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1pn15_f64: @@ -1026,9 +1000,7 @@ define double @v_mul_0x1pn15_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1pn15_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0.5 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x3f000000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1pn15_f64: @@ -1040,9 +1012,7 @@ define double @v_mul_0x1pn15_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1pn15_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0.5 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x3f000000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 0.000030517578125 ret double %mul @@ -1058,9 +1028,9 @@ define double @v_mul_neg256_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_neg256_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0700000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0700000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_neg256_f64: @@ -1072,9 +1042,7 @@ define double @v_mul_neg256_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_neg256_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0700000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0700000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_neg256_f64: @@ -1086,9 +1054,7 @@ define double @v_mul_neg256_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_neg256_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0700000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0700000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -256.0 ret double %mul @@ -1104,9 +1070,9 @@ define double @v_mul_neg128_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_neg128_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0600000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0600000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_neg128_f64: @@ -1118,9 +1084,7 @@ define double @v_mul_neg128_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_neg128_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0600000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0600000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_neg128_f64: @@ -1132,9 +1096,7 @@ define double @v_mul_neg128_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_neg128_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0600000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0600000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -128.0 ret double %mul @@ -1150,9 +1112,9 @@ define double @v_mul_neg64_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_neg64_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0500000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0500000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_neg64_f64: @@ -1164,9 +1126,7 @@ define double @v_mul_neg64_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_neg64_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0500000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0500000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_neg64_f64: @@ -1178,9 +1138,7 @@ define double @v_mul_neg64_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_neg64_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0500000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0500000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -64.0 ret double %mul @@ -1196,9 +1154,9 @@ define double @v_mul_neg32_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_neg32_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0400000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0400000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_neg32_f64: @@ -1210,9 +1168,7 @@ define double @v_mul_neg32_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_neg32_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0400000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0400000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_neg32_f64: @@ -1224,9 +1180,7 @@ define double @v_mul_neg32_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_neg32_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0400000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0400000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -32.0 ret double %mul @@ -1242,9 +1196,9 @@ define double @v_mul_neg16_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_neg16_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0300000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0300000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_neg16_f64: @@ -1256,9 +1210,7 @@ define double @v_mul_neg16_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_neg16_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0300000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0300000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_neg16_f64: @@ -1270,9 +1222,7 @@ define double @v_mul_neg16_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_neg16_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0300000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0300000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -16.0 ret double %mul @@ -1288,9 +1238,9 @@ define double @v_mul_neg8_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_neg8_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0200000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0200000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_neg8_f64: @@ -1302,9 +1252,7 @@ define double @v_mul_neg8_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_neg8_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0200000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0200000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_neg8_f64: @@ -1316,9 +1264,7 @@ define double @v_mul_neg8_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_neg8_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0200000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0200000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -8.0 ret double %mul @@ -1414,9 +1360,9 @@ define double @v_mul_neg_quarter_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_neg_quarter_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xbfd00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xbfd00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_neg_quarter_f64: @@ -1428,9 +1374,7 @@ define double @v_mul_neg_quarter_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_neg_quarter_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xbfd00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xbfd00000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_neg_quarter_f64: @@ -1442,9 +1386,7 @@ define double @v_mul_neg_quarter_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_neg_quarter_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xbfd00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xbfd00000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -0.25 ret double %mul @@ -1460,9 +1402,9 @@ define double @v_mul_quarter_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_quarter_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x3fd00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fd00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_quarter_f64: @@ -1474,9 +1416,7 @@ define double @v_mul_quarter_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_quarter_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x3fd00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x3fd00000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_quarter_f64: @@ -1488,9 +1428,7 @@ define double @v_mul_quarter_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_quarter_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x3fd00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x3fd00000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 0.25 ret double %mul @@ -1575,9 +1513,9 @@ define double @v_mul_8_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_8_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40200000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40200000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_8_f64: @@ -1589,9 +1527,7 @@ define double @v_mul_8_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_8_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40200000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40200000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_8_f64: @@ -1603,9 +1539,7 @@ define double @v_mul_8_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_8_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40200000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40200000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 8.0 ret double %mul @@ -1621,9 +1555,9 @@ define double @v_mul_16_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_16_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40300000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40300000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_16_f64: @@ -1635,9 +1569,7 @@ define double @v_mul_16_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_16_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40300000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40300000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_16_f64: @@ -1649,9 +1581,7 @@ define double @v_mul_16_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_16_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40300000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40300000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 16.0 ret double %mul @@ -1667,9 +1597,9 @@ define double @v_mul_32_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_32_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40400000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40400000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_32_f64: @@ -1681,9 +1611,7 @@ define double @v_mul_32_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_32_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40400000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_32_f64: @@ -1695,9 +1623,7 @@ define double @v_mul_32_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_32_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40400000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 32.0 ret double %mul @@ -1713,9 +1639,9 @@ define double @v_mul_64_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_64_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40500000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40500000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_64_f64: @@ -1727,9 +1653,7 @@ define double @v_mul_64_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_64_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40500000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40500000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_64_f64: @@ -1741,9 +1665,7 @@ define double @v_mul_64_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_64_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40500000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40500000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 64.0 ret double %mul @@ -1759,9 +1681,9 @@ define double @v_mul_128_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_128_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40600000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40600000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_128_f64: @@ -1773,9 +1695,7 @@ define double @v_mul_128_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_128_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40600000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40600000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_128_f64: @@ -1787,9 +1707,7 @@ define double @v_mul_128_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_128_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40600000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40600000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 128.0 ret double %mul @@ -1805,9 +1723,9 @@ define double @v_mul_256_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_256_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40700000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40700000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_256_f64: @@ -1819,9 +1737,7 @@ define double @v_mul_256_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_256_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40700000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40700000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_256_f64: @@ -1833,9 +1749,7 @@ define double @v_mul_256_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_256_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40700000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40700000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 256.0 ret double %mul @@ -1852,9 +1766,9 @@ define double @v_mul_0x1p63_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1p63_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x43e00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x43e00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1p63_f64: @@ -1866,9 +1780,7 @@ define double @v_mul_0x1p63_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1p63_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x43e00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x43e00000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1p63_f64: @@ -1880,9 +1792,7 @@ define double @v_mul_0x1p63_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1p63_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x43e00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x43e00000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 9223372036854775808.0 ret double %mul @@ -1899,9 +1809,9 @@ define double @v_mul_0x1p64_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1p64_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x43f00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x43f00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1p64_f64: @@ -1913,9 +1823,7 @@ define double @v_mul_0x1p64_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1p64_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x43f00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x43f00000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1p64_f64: @@ -1927,9 +1835,7 @@ define double @v_mul_0x1p64_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1p64_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x43f00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x43f00000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 18446744073709551616.0 ret double %mul @@ -1947,9 +1853,9 @@ define double @v_mul_0x1p65_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1p65_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_brev_b32 s5, 34 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v3, 34 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1p65_f64: @@ -1961,9 +1867,7 @@ define double @v_mul_0x1p65_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1p65_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_brev_b32 s5, 34 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x44000000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1p65_f64: @@ -1975,9 +1879,7 @@ define double @v_mul_0x1p65_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1p65_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_brev_b32 s1, 34 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x44000000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 36893488147419103232.0 ret double %mul @@ -1994,10 +1896,8 @@ define amdgpu_ps <2 x i32> @s_mul_0x1p65_f64(double inreg %x, double inreg %y) { ; ; GFX9-GISEL-LABEL: s_mul_0x1p65_f64: ; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_mov_b32 s2, 0 -; GFX9-GISEL-NEXT: s_brev_b32 s3, 34 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v1, 34 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1] ; GFX9-GISEL-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-GISEL-NEXT: v_readfirstlane_b32 s1, v1 @@ -2012,9 +1912,7 @@ define amdgpu_ps <2 x i32> @s_mul_0x1p65_f64(double inreg %x, double inreg %y) { ; ; GFX10-GISEL-LABEL: s_mul_0x1p65_f64: ; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_mov_b32 s2, 0 -; GFX10-GISEL-NEXT: s_brev_b32 s3, 34 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], s[2:3] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x44000000, s[0:1] ; GFX10-GISEL-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-GISEL-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-GISEL-NEXT: ; return to shader part epilog @@ -2028,9 +1926,7 @@ define amdgpu_ps <2 x i32> @s_mul_0x1p65_f64(double inreg %x, double inreg %y) { ; ; GFX11-GISEL-LABEL: s_mul_0x1p65_f64: ; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_mov_b32 s2, 0 -; GFX11-GISEL-NEXT: s_brev_b32 s3, 34 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], s[2:3] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x44000000, s[0:1] ; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-GISEL-NEXT: ; return to shader part epilog @@ -2057,9 +1953,9 @@ define double @v_mul_0x1p128_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1p128_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x47f00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x47f00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1p128_f64: @@ -2071,9 +1967,7 @@ define double @v_mul_0x1p128_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1p128_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x47f00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x47f00000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1p128_f64: @@ -2085,9 +1979,7 @@ define double @v_mul_0x1p128_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1p128_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x47f00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x47f00000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 3.40282366920938463463e+38 ret double %mul @@ -2105,9 +1997,9 @@ define double @v_mul_0x1p1022_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1p1022_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x7fd00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fd00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1p1022_f64: @@ -2119,9 +2011,7 @@ define double @v_mul_0x1p1022_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1p1022_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x7fd00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x7fd00000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1p1022_f64: @@ -2133,9 +2023,7 @@ define double @v_mul_0x1p1022_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1p1022_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x7fd00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x7fd00000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 4.49423283715578976932e+307 ret double %mul @@ -2153,9 +2041,9 @@ define double @v_mul_0x1p1023_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_0x1p1023_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x7fe00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fe00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_0x1p1023_f64: @@ -2167,9 +2055,7 @@ define double @v_mul_0x1p1023_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_0x1p1023_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x7fe00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x7fe00000, v[0:1] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_0x1p1023_f64: @@ -2181,9 +2067,7 @@ define double @v_mul_0x1p1023_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_0x1p1023_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x7fe00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x7fe00000, v[0:1] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 8.98846567431157953865e+307 ret double %mul @@ -2191,29 +2075,27 @@ define double @v_mul_0x1p1023_f64(double %x) { ; Check that this doesn't interfer with fma formation define double @v_fma_mul_add_32_f64(double %x, double %y) { -; GFX9-LABEL: v_fma_mul_add_32_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40400000 -; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_fma_mul_add_32_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0 +; GFX9-SDAG-NEXT: s_mov_b32 s5, 0x40400000 +; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_fma_mul_add_32_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40400000 -; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], v[2:3] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_fma_mul_add_32_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40400000 +; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[2:3] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_fma_mul_add_32_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40400000 -; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], s[0:1], v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_fma_mul_add_32_f64: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_fma_f64 v[0:1], 0x40400000, v[0:1], v[2:3] +; GFX1011-NEXT: s_setpc_b64 s[30:31] %mul = fmul contract double %x, 32.0 %fma = fadd contract double %mul, %y ret double %fma @@ -2229,23 +2111,33 @@ define <2 x double> @v_fma_mul_add_32_v2f64(<2 x double> %x, <2 x double> %y) { ; GFX9-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], v[6:7] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_fma_mul_add_32_v2f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40400000 -; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], v[4:5] -; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], v[6:7] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_fma_mul_add_32_v2f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], 0x40400000, v[0:1], v[4:5] +; GFX10-SDAG-NEXT: v_fma_f64 v[2:3], 0x40400000, v[2:3], v[6:7] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_fma_mul_add_32_v2f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40400000 -; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], s[0:1], v[4:5] -; GFX11-NEXT: v_fma_f64 v[2:3], v[2:3], s[0:1], v[6:7] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-GISEL-LABEL: v_fma_mul_add_32_v2f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], 0x40400000, v[4:5] +; GFX10-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], 0x40400000, v[6:7] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_fma_mul_add_32_v2f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], 0x40400000, v[0:1], v[4:5] +; GFX11-SDAG-NEXT: v_fma_f64 v[2:3], 0x40400000, v[2:3], v[6:7] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_fma_mul_add_32_v2f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], 0x40400000, v[4:5] +; GFX11-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], 0x40400000, v[6:7] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul contract <2 x double> %x, %fma = fadd contract <2 x double> %mul, %y ret <2 x double> %fma @@ -2319,9 +2211,9 @@ define double @v_mul_add_32_f64(double %x, double %y) { ; GFX9-GISEL-LABEL: v_mul_add_32_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40400000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40400000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2335,9 +2227,7 @@ define double @v_mul_add_32_f64(double %x, double %y) { ; GFX10-GISEL-LABEL: v_mul_add_32_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40400000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, v[0:1] ; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2351,9 +2241,7 @@ define double @v_mul_add_32_f64(double %x, double %y) { ; GFX11-GISEL-LABEL: v_mul_add_32_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40400000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, v[0:1] ; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 32.0 @@ -2456,58 +2344,54 @@ define double @v_mul_add_4_f64(double %x, double %y) { } define double @v_fma_mul_sub_32_f64(double %x, double %y) { -; GFX9-LABEL: v_fma_mul_sub_32_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40400000 -; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], -v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_fma_mul_sub_32_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0 +; GFX9-SDAG-NEXT: s_mov_b32 s5, 0x40400000 +; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], -v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_fma_mul_sub_32_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40400000 -; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], -v[2:3] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_fma_mul_sub_32_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40400000 +; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], -v[2:3] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_fma_mul_sub_32_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40400000 -; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], s[0:1], -v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_fma_mul_sub_32_f64: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_fma_f64 v[0:1], 0x40400000, v[0:1], -v[2:3] +; GFX1011-NEXT: s_setpc_b64 s[30:31] %mul = fmul contract double %x, 32.0 %fma = fsub contract double %mul, %y ret double %fma } define double @v_fma_mul_add_neg32_f64(double %x, double %y) { -; GFX9-LABEL: v_fma_mul_add_neg32_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0xc0400000 -; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_fma_mul_add_neg32_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0 +; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xc0400000 +; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_fma_mul_add_neg32_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xc0400000 -; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], v[2:3] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_fma_mul_add_neg32_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc0400000 +; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[2:3] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_fma_mul_add_neg32_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xc0400000 -; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], s[0:1], v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_fma_mul_add_neg32_f64: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_fma_f64 v[0:1], 0xc0400000, v[0:1], v[2:3] +; GFX1011-NEXT: s_setpc_b64 s[30:31] %mul = fmul contract double %x, -32.0 %fma = fadd contract double %mul, %y ret double %fma @@ -2523,9 +2407,9 @@ define double @v_mul_fabs_32_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_fabs_32_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40400000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40400000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_fabs_32_f64: @@ -2537,9 +2421,7 @@ define double @v_mul_fabs_32_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_fabs_32_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40400000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, |v[0:1]| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_fabs_32_f64: @@ -2551,9 +2433,7 @@ define double @v_mul_fabs_32_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_fabs_32_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40400000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, |v[0:1]| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %x.fabs = call double @llvm.fabs.f64(double %x) %mul = fmul double %x.fabs, 32.0 @@ -2561,29 +2441,27 @@ define double @v_mul_fabs_32_f64(double %x) { } define double @v_mul_add_fma_fabs_32_f64(double %x, double %y) { -; GFX9-LABEL: v_mul_add_fma_fabs_32_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40400000 -; GFX9-NEXT: v_fma_f64 v[0:1], |v[0:1]|, s[4:5], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_add_fma_fabs_32_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0 +; GFX9-SDAG-NEXT: s_mov_b32 s5, 0x40400000 +; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], |v[0:1]|, s[4:5], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_add_fma_fabs_32_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40400000 -; GFX10-NEXT: v_fma_f64 v[0:1], |v[0:1]|, s[4:5], v[2:3] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_add_fma_fabs_32_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40400000 +; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], |v[0:1]|, v[4:5], v[2:3] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_add_fma_fabs_32_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40400000 -; GFX11-NEXT: v_fma_f64 v[0:1], |v[0:1]|, s[0:1], v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_add_fma_fabs_32_f64: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_fma_f64 v[0:1], 0x40400000, |v[0:1]|, v[2:3] +; GFX1011-NEXT: s_setpc_b64 s[30:31] %x.fabs = call double @llvm.fabs.f64(double %x) %mul = fmul contract double %x.fabs, 32.0 %fma = fadd contract double %mul, %y @@ -2617,10 +2495,8 @@ define <2 x double> @v_mul_16_v2f64(<2 x double> %x) { ; GFX10-GISEL-LABEL: v_mul_16_v2f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40300000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x40300000 +; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], 0x40300000 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_16_v2f64: @@ -2633,10 +2509,8 @@ define <2 x double> @v_mul_16_v2f64(<2 x double> %x) { ; GFX11-GISEL-LABEL: v_mul_16_v2f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40300000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x40300000 +; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], 0x40300000 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul <2 x double> %x, ret <2 x double> %mul @@ -2669,10 +2543,8 @@ define <2 x double> @v_mul_neg16_v2f64(<2 x double> %x) { ; GFX10-GISEL-LABEL: v_mul_neg16_v2f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0300000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0xc0300000 +; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], 0xc0300000 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_neg16_v2f64: @@ -2685,10 +2557,8 @@ define <2 x double> @v_mul_neg16_v2f64(<2 x double> %x) { ; GFX11-GISEL-LABEL: v_mul_neg16_v2f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0300000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0xc0300000 +; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], 0xc0300000 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul <2 x double> %x, ret <2 x double> %mul @@ -2721,10 +2591,8 @@ define <2 x double> @v_mul_fabs_16_v2f64(<2 x double> %x) { ; GFX10-GISEL-LABEL: v_mul_fabs_16_v2f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40300000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] -; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 0x40300000 +; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, 0x40300000 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_fabs_16_v2f64: @@ -2737,10 +2605,8 @@ define <2 x double> @v_mul_fabs_16_v2f64(<2 x double> %x) { ; GFX11-GISEL-LABEL: v_mul_fabs_16_v2f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40300000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] -; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 0x40300000 +; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, 0x40300000 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %x.fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x) %mul = fmul <2 x double> %x.fabs, @@ -2757,10 +2623,8 @@ define amdgpu_ps <2 x i32> @s_mul_32_f64(double inreg %x, double inreg %y) { ; ; GFX9-GISEL-LABEL: s_mul_32_f64: ; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_mov_b32 s2, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s3, 0x40400000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x40400000 ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1] ; GFX9-GISEL-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-GISEL-NEXT: v_readfirstlane_b32 s1, v1 @@ -2775,9 +2639,7 @@ define amdgpu_ps <2 x i32> @s_mul_32_f64(double inreg %x, double inreg %y) { ; ; GFX10-GISEL-LABEL: s_mul_32_f64: ; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_mov_b32 s2, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x40400000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], s[2:3] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, s[0:1] ; GFX10-GISEL-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-GISEL-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-GISEL-NEXT: ; return to shader part epilog @@ -2791,9 +2653,7 @@ define amdgpu_ps <2 x i32> @s_mul_32_f64(double inreg %x, double inreg %y) { ; ; GFX11-GISEL-LABEL: s_mul_32_f64: ; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_mov_b32 s2, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x40400000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], s[2:3] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40400000, s[0:1] ; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-GISEL-NEXT: ; return to shader part epilog @@ -6885,9 +6745,9 @@ define double @v_constrained_fmul_32_f64(double %x, double %y) #0 { ; GFX9-GISEL-LABEL: v_constrained_fmul_32_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40400000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40400000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_constrained_fmul_32_f64: @@ -6899,9 +6759,7 @@ define double @v_constrained_fmul_32_f64(double %x, double %y) #0 { ; GFX10-GISEL-LABEL: v_constrained_fmul_32_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40400000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x40400000 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_constrained_fmul_32_f64: @@ -6913,9 +6771,7 @@ define double @v_constrained_fmul_32_f64(double %x, double %y) #0 { ; GFX11-GISEL-LABEL: v_constrained_fmul_32_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40400000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x40400000 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double 32.0, metadata !"round.dynamic", metadata !"fpexcept.strict") ret double %val @@ -6931,9 +6787,9 @@ define double @v_constrained_fmul_0x1p64_f64(double %x, double %y) #0 { ; GFX9-GISEL-LABEL: v_constrained_fmul_0x1p64_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x43f00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x43f00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_constrained_fmul_0x1p64_f64: @@ -6945,9 +6801,7 @@ define double @v_constrained_fmul_0x1p64_f64(double %x, double %y) #0 { ; GFX10-GISEL-LABEL: v_constrained_fmul_0x1p64_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x43f00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x43f00000 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_constrained_fmul_0x1p64_f64: @@ -6959,9 +6813,7 @@ define double @v_constrained_fmul_0x1p64_f64(double %x, double %y) #0 { ; GFX11-GISEL-LABEL: v_constrained_fmul_0x1p64_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x43f00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x43f00000 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double 18446744073709551616.0, metadata !"round.dynamic", metadata !"fpexcept.strict") ret double %val @@ -6988,9 +6840,9 @@ define double @v_mul_fabs_0x1pn1031_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_fabs_0x1pn1031_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_movk_i32 s5, 0x800 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x800 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_fabs_0x1pn1031_f64: @@ -7002,9 +6854,7 @@ define double @v_mul_fabs_0x1pn1031_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_fabs_0x1pn1031_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_movk_i32 s5, 0x800 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x800, |v[0:1]| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_fabs_0x1pn1031_f64: @@ -7016,9 +6866,7 @@ define double @v_mul_fabs_0x1pn1031_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_fabs_0x1pn1031_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_movk_i32 s1, 0x800 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x800, |v[0:1]| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call double @llvm.fabs.f64(double %x) %mul = fmul double %fabs.x, 4.34584737989687770135e-311 @@ -7035,9 +6883,9 @@ define double @v_mul_fabs_neg256_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_fabs_neg256_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0700000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0700000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_fabs_neg256_f64: @@ -7049,9 +6897,7 @@ define double @v_mul_fabs_neg256_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_fabs_neg256_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0700000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0700000, |v[0:1]| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_fabs_neg256_f64: @@ -7063,9 +6909,7 @@ define double @v_mul_fabs_neg256_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_fabs_neg256_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0700000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0700000, |v[0:1]| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call double @llvm.fabs.f64(double %x) %mul = fmul double %fabs.x, -256.0 @@ -7082,9 +6926,9 @@ define double @v_mul_fabs_neg8_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_fabs_neg8_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0200000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0200000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_fabs_neg8_f64: @@ -7096,9 +6940,7 @@ define double @v_mul_fabs_neg8_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_fabs_neg8_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0200000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0200000, |v[0:1]| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_fabs_neg8_f64: @@ -7110,9 +6952,7 @@ define double @v_mul_fabs_neg8_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_fabs_neg8_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0200000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0200000, |v[0:1]| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call double @llvm.fabs.f64(double %x) %mul = fmul double %fabs.x, -8.0 @@ -7203,9 +7043,9 @@ define double @v_mul_fabs_negquarter_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_fabs_negquarter_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xbfd00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xbfd00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_fabs_negquarter_f64: @@ -7217,9 +7057,7 @@ define double @v_mul_fabs_negquarter_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_fabs_negquarter_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xbfd00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xbfd00000, |v[0:1]| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_fabs_negquarter_f64: @@ -7231,9 +7069,7 @@ define double @v_mul_fabs_negquarter_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_fabs_negquarter_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xbfd00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xbfd00000, |v[0:1]| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call double @llvm.fabs.f64(double %x) %mul = fmul double %fabs.x, -0.25 @@ -7250,9 +7086,9 @@ define double @v_mul_fabs_quarter_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_fabs_quarter_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x3fd00000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fd00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_fabs_quarter_f64: @@ -7264,9 +7100,7 @@ define double @v_mul_fabs_quarter_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_fabs_quarter_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x3fd00000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x3fd00000, |v[0:1]| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_fabs_quarter_f64: @@ -7278,9 +7112,7 @@ define double @v_mul_fabs_quarter_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_fabs_quarter_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x3fd00000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x3fd00000, |v[0:1]| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call double @llvm.fabs.f64(double %x) %mul = fmul double %fabs.x, 0.25 @@ -7371,9 +7203,9 @@ define double @v_mul_fabs_8_f64(double %x) { ; GFX9-GISEL-LABEL: v_mul_fabs_8_f64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40200000 -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x40200000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[2:3] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_fabs_8_f64: @@ -7385,9 +7217,7 @@ define double @v_mul_fabs_8_f64(double %x) { ; GFX10-GISEL-LABEL: v_mul_fabs_8_f64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40200000 -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40200000, |v[0:1]| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_fabs_8_f64: @@ -7399,9 +7229,7 @@ define double @v_mul_fabs_8_f64(double %x) { ; GFX11-GISEL-LABEL: v_mul_fabs_8_f64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40200000 -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40200000, |v[0:1]| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call double @llvm.fabs.f64(double %x) %mul = fmul double %fabs.x, 8.0 diff --git a/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll index d4c830c55030d6..b60780db77378c 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll @@ -435,21 +435,17 @@ define double @fmul_pow_shl_cnt(i64 %cnt) nounwind { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v0, 1 -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40220000 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[1:2], v1 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[3:4], v0 ; GFX10-NEXT: v_ldexp_f64 v[0:1], v[1:2], 32 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-NEXT: v_mul_f64 v[0:1], 0x40220000, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: fmul_pow_shl_cnt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v0, 1 -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40220000 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[1:2], v1 ; GFX11-NEXT: v_cvt_f64_u32_e32 v[3:4], v0 @@ -457,7 +453,7 @@ define double @fmul_pow_shl_cnt(i64 %cnt) nounwind { ; GFX11-NEXT: v_ldexp_f64 v[0:1], v[1:2], 32 ; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-NEXT: v_mul_f64 v[0:1], 0x40220000, v[0:1] ; GFX11-NEXT: s_setpc_b64 s[30:31] %shl = shl nuw i64 1, %cnt %conv = uitofp i64 %shl to double @@ -529,21 +525,17 @@ define double @fmul_pow_shl_cnt2(i64 %cnt) nounwind { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v0, 2 -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xc0220000 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[1:2], v1 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[3:4], v0 ; GFX10-NEXT: v_ldexp_f64 v[0:1], v[1:2], 32 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-NEXT: v_mul_f64 v[0:1], 0xc0220000, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: fmul_pow_shl_cnt2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v0, 2 -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xc0220000 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[1:2], v1 ; GFX11-NEXT: v_cvt_f64_u32_e32 v[3:4], v0 @@ -551,7 +543,7 @@ define double @fmul_pow_shl_cnt2(i64 %cnt) nounwind { ; GFX11-NEXT: v_ldexp_f64 v[0:1], v[1:2], 32 ; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-NEXT: v_mul_f64 v[0:1], 0xc0220000, v[0:1] ; GFX11-NEXT: s_setpc_b64 s[30:31] %shl = shl nuw i64 2, %cnt %conv = uitofp i64 %shl to double @@ -721,8 +713,7 @@ define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v0, 8 -; GFX10-NEXT: s_mov_b64 s[4:5], 0x2000 -; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[0:1] +; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, 0x2000, v[0:1] ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x2000, v0, vcc_lo ; GFX10-NEXT: v_ffbh_u32_e32 v2, v1 @@ -740,23 +731,22 @@ define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v0, 8 -; GFX11-NEXT: s_mov_b64 s[0:1], 0x2000 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, 0x2000, v[0:1] ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x2000, v0, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_clz_i32_u32_e32 v2, v1 -; GFX11-NEXT: v_min_u32_e32 v2, 32, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_min_u32_e32 v2, 32, v2 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_min_u32_e32 v0, 1, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX11-NEXT: v_sub_nc_u32_e32 v1, 32, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_f32_u32_e32 v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mul_f32_e32 v0, 0x41100000, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %shl8 = shl nuw i64 8, %cnt @@ -838,24 +828,20 @@ define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_lshlrev_b16 v0, v0, 2 -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40080000 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-NEXT: v_mul_f64 v[0:1], 0x40080000, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: fmul_pow_mul_max_pow2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b16 v0, v0, 2 -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40080000 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-NEXT: v_mul_f64 v[0:1], 0x40080000, v[0:1] ; GFX11-NEXT: s_setpc_b64 s[30:31] %shl2 = shl nuw i16 2, %cnt %shl1 = shl nuw i16 1, %cnt @@ -925,21 +911,17 @@ define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40220000 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[1:2], v1 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[3:4], v0 ; GFX10-NEXT: v_ldexp_f64 v[0:1], v[1:2], 32 ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-NEXT: v_mul_f64 v[0:1], 0x40220000, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40220000 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[1:2], v1 ; GFX11-NEXT: v_cvt_f64_u32_e32 v[3:4], v0 @@ -947,7 +929,7 @@ define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind { ; GFX11-NEXT: v_ldexp_f64 v[0:1], v[1:2], 32 ; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-NEXT: v_mul_f64 v[0:1], 0x40220000, v[0:1] ; GFX11-NEXT: s_setpc_b64 s[30:31] %shl = shl nuw i64 %v, %cnt %conv = uitofp i64 %shl to double @@ -1206,8 +1188,6 @@ define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v0, 2 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], v2, 2 -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x402e0000 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[4:5], v1 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[6:7], v3 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 @@ -1216,8 +1196,8 @@ define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind { ; GFX10-NEXT: v_ldexp_f64 v[5:6], v[6:7], 32 ; GFX10-NEXT: v_add_f64 v[0:1], v[3:4], v[0:1] ; GFX10-NEXT: v_add_f64 v[2:3], v[5:6], v[8:9] -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] +; GFX10-NEXT: v_mul_f64 v[0:1], 0x402e0000, v[0:1] +; GFX10-NEXT: v_mul_f64 v[2:3], 0x402e0000, v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: fmul_pow_shl_cnt_vec: @@ -1225,11 +1205,10 @@ define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v0, 2 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], v2, 2 -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x402e0000 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[4:5], v1 ; GFX11-NEXT: v_cvt_f64_u32_e32 v[6:7], v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 ; GFX11-NEXT: v_cvt_f64_u32_e32 v[8:9], v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) @@ -1239,8 +1218,8 @@ define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind { ; GFX11-NEXT: v_add_f64 v[0:1], v[3:4], v[0:1] ; GFX11-NEXT: v_add_f64 v[2:3], v[5:6], v[8:9] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: v_mul_f64 v[2:3], v[2:3], s[0:1] +; GFX11-NEXT: v_mul_f64 v[0:1], 0x402e0000, v[0:1] +; GFX11-NEXT: v_mul_f64 v[2:3], 0x402e0000, v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] %shl = shl nsw nuw <2 x i64> , %cnt %conv = uitofp <2 x i64> %shl to <2 x double> @@ -1435,20 +1414,21 @@ define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwin ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_lshlrev_b64 v[0:1], v0, 2 -; VI-NEXT: v_lshlrev_b64 v[2:3], v2, 2 -; VI-NEXT: v_cvt_f64_u32_e32 v[4:5], v1 -; VI-NEXT: v_cvt_f64_u32_e32 v[6:7], v3 -; VI-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 +; VI-NEXT: s_mov_b32 s4, 0 +; VI-NEXT: v_cvt_f64_u32_e32 v[3:4], v1 +; VI-NEXT: v_lshlrev_b64 v[1:2], v2, 2 ; VI-NEXT: s_mov_b32 s5, 0x402e0000 -; VI-NEXT: v_ldexp_f64 v[3:4], v[4:5], 32 -; VI-NEXT: v_ldexp_f64 v[5:6], v[6:7], 32 -; VI-NEXT: v_cvt_f64_u32_e32 v[7:8], v2 +; VI-NEXT: v_cvt_f64_u32_e32 v[5:6], v2 +; VI-NEXT: v_ldexp_f64 v[2:3], v[3:4], 32 +; VI-NEXT: v_ldexp_f64 v[4:5], v[5:6], 32 +; VI-NEXT: v_cvt_f64_u32_e32 v[6:7], v0 +; VI-NEXT: v_cvt_f64_u32_e32 v[0:1], v1 +; VI-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7] +; VI-NEXT: v_add_f64 v[4:5], v[4:5], v[0:1] +; VI-NEXT: v_mul_f64 v[0:1], v[2:3], s[4:5] ; VI-NEXT: s_mov_b32 s4, 0 -; VI-NEXT: v_add_f64 v[0:1], v[3:4], v[0:1] -; VI-NEXT: v_add_f64 v[2:3], v[5:6], v[7:8] -; VI-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] ; VI-NEXT: s_mov_b32 s5, 0x402c0000 -; VI-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] +; VI-NEXT: v_mul_f64 v[2:3], v[4:5], s[4:5] ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo: @@ -1456,8 +1436,6 @@ define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwin ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v0, 2 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], v2, 2 -; GFX10-NEXT: s_mov_b32 s5, 0x402e0000 -; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[4:5], v1 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[6:7], v3 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 @@ -1466,9 +1444,8 @@ define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwin ; GFX10-NEXT: v_ldexp_f64 v[5:6], v[6:7], 32 ; GFX10-NEXT: v_add_f64 v[0:1], v[3:4], v[0:1] ; GFX10-NEXT: v_add_f64 v[2:3], v[5:6], v[8:9] -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_mov_b32 s5, 0x402c0000 -; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] +; GFX10-NEXT: v_mul_f64 v[0:1], 0x402e0000, v[0:1] +; GFX10-NEXT: v_mul_f64 v[2:3], 0x402c0000, v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo: @@ -1476,11 +1453,10 @@ define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwin ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v0, 2 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], v2, 2 -; GFX11-NEXT: s_mov_b32 s1, 0x402e0000 -; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[4:5], v1 ; GFX11-NEXT: v_cvt_f64_u32_e32 v[6:7], v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 ; GFX11-NEXT: v_cvt_f64_u32_e32 v[8:9], v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) @@ -1489,11 +1465,9 @@ define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwin ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_f64 v[0:1], v[3:4], v[0:1] ; GFX11-NEXT: v_add_f64 v[2:3], v[5:6], v[8:9] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_mov_b32 s1, 0x402c0000 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_mul_f64 v[2:3], v[2:3], s[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mul_f64 v[0:1], 0x402e0000, v[0:1] +; GFX11-NEXT: v_mul_f64 v[2:3], 0x402c0000, v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] %shl = shl nsw nuw <2 x i64> , %cnt %conv = uitofp <2 x i64> %shl to <2 x double> @@ -1580,8 +1554,6 @@ define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) nounwi ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v0, 2 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], v2, 1 -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x402e0000 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[4:5], v1 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[6:7], v3 ; GFX10-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 @@ -1590,8 +1562,8 @@ define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) nounwi ; GFX10-NEXT: v_ldexp_f64 v[5:6], v[6:7], 32 ; GFX10-NEXT: v_add_f64 v[0:1], v[3:4], v[0:1] ; GFX10-NEXT: v_add_f64 v[2:3], v[5:6], v[8:9] -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] +; GFX10-NEXT: v_mul_f64 v[0:1], 0x402e0000, v[0:1] +; GFX10-NEXT: v_mul_f64 v[2:3], 0x402e0000, v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo: @@ -1599,11 +1571,10 @@ define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) nounwi ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v0, 2 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], v2, 1 -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x402e0000 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[4:5], v1 ; GFX11-NEXT: v_cvt_f64_u32_e32 v[6:7], v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 ; GFX11-NEXT: v_cvt_f64_u32_e32 v[8:9], v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) @@ -1613,8 +1584,8 @@ define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) nounwi ; GFX11-NEXT: v_add_f64 v[0:1], v[3:4], v[0:1] ; GFX11-NEXT: v_add_f64 v[2:3], v[5:6], v[8:9] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: v_mul_f64 v[2:3], v[2:3], s[0:1] +; GFX11-NEXT: v_mul_f64 v[0:1], 0x402e0000, v[0:1] +; GFX11-NEXT: v_mul_f64 v[2:3], 0x402e0000, v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] %shl = shl nsw nuw <2 x i64> , %cnt %conv = uitofp <2 x i64> %shl to <2 x double> diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll index 58b0a0f56918b0..fdc8c908ded549 100644 --- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll @@ -1561,12 +1561,12 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 { ; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1 +; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 ; GFX940-NEXT: s_endpgm @@ -1579,12 +1579,12 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 +; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] +; GFX90A-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm @@ -1592,12 +1592,12 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 { ; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] +; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc1 ; GFX940-NEXT: s_endpgm @@ -1637,12 +1637,12 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 { ; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_system: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1 +; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 ; GFX940-NEXT: s_endpgm @@ -1838,12 +1838,12 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) { ; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] +; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc1 ; GFX940-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll index 055cfbdcc1ea39..3a0b8259d08496 100644 --- a/llvm/test/CodeGen/AMDGPU/fract-match.ll +++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll @@ -1821,14 +1821,14 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon ; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], -v[4:5] ; GFX6-NEXT: s_mov_b32 s9, 0x3fefffff ; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5] -; GFX6-NEXT: s_mov_b32 s6, 0 -; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], s[8:9] ; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], s[8:9] +; GFX6-NEXT: s_mov_b32 s8, 0 ; GFX6-NEXT: s_mov_b32 s9, 0x7ff00000 -; GFX6-NEXT: s_mov_b32 s8, s6 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc ; GFX6-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[8:9] +; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 @@ -1841,13 +1841,14 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon ; GFX7-LABEL: safe_math_fract_f64: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s4, 0 ; GFX7-NEXT: s_mov_b32 s5, 0x7ff00000 -; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: v_fract_f64_e32 v[4:5], v[0:1] ; GFX7-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5] ; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1] +; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 ; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc @@ -1872,10 +1873,8 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon ; GFX11-LABEL: safe_math_fract_f64: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x7ff00000 ; GFX11-NEXT: v_fract_f64_e32 v[4:5], v[0:1] -; GFX11-NEXT: v_cmp_neq_f64_e64 vcc_lo, |v[0:1]|, s[0:1] +; GFX11-NEXT: v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]| ; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5 diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll index 8bb8f6c464cd02..196a3705ac8187 100644 --- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG %s -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL %s define double @v_sqrt_f64(double %x) { ; SDAG-LABEL: v_sqrt_f64: @@ -37,11 +37,11 @@ define double @v_sqrt_f64(double %x) { ; GISEL-LABEL: v_sqrt_f64: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -97,11 +97,11 @@ define double @v_sqrt_f64_fneg(double %x) { ; GISEL-LABEL: v_sqrt_f64_fneg: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -158,11 +158,11 @@ define double @v_sqrt_f64_fabs(double %x) { ; GISEL-LABEL: v_sqrt_f64_fabs: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -219,11 +219,11 @@ define double @v_sqrt_f64_fneg_fabs(double %x) { ; GISEL-LABEL: v_sqrt_f64_fneg_fabs: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -281,11 +281,11 @@ define double @v_sqrt_f64_ninf(double %x) { ; GISEL-LABEL: v_sqrt_f64_ninf: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -341,11 +341,11 @@ define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true" ; GISEL-LABEL: v_sqrt_f64_no_infs_attribute: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -401,11 +401,11 @@ define double @v_sqrt_f64_nnan(double %x) { ; GISEL-LABEL: v_sqrt_f64_nnan: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -461,10 +461,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64(double inreg %x) { ; ; GISEL-LABEL: s_sqrt_f64: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_mov_b32 s2, 0 -; GISEL-NEXT: s_brev_b32 s3, 8 -; GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] ; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc @@ -532,10 +530,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) { ; ; GISEL-LABEL: s_sqrt_f64_ninf: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_mov_b32 s2, 0 -; GISEL-NEXT: s_brev_b32 s3, 8 -; GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] ; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc @@ -603,10 +599,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn(double inreg %x) { ; ; GISEL-LABEL: s_sqrt_f64_afn: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_mov_b32 s2, 0 -; GISEL-NEXT: s_brev_b32 s3, 8 -; GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] ; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc @@ -674,10 +668,8 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) { ; ; GISEL-LABEL: s_sqrt_f64_afn_nnan_ninf: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_mov_b32 s2, 0 -; GISEL-NEXT: s_brev_b32 s3, 8 -; GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] ; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc @@ -745,11 +737,11 @@ define double @v_sqrt_f64_nsz(double %x) { ; GISEL-LABEL: v_sqrt_f64_nsz: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -805,11 +797,11 @@ define double @v_sqrt_f64_nnan_ninf(double %x) { ; GISEL-LABEL: v_sqrt_f64_nnan_ninf: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -865,11 +857,11 @@ define double @v_sqrt_f64_nnan_ninf_nsz(double %x) { ; GISEL-LABEL: v_sqrt_f64_nnan_ninf_nsz: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -925,11 +917,11 @@ define double @v_sqrt_f64_afn(double %x) { ; GISEL-LABEL: v_sqrt_f64_afn: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -985,11 +977,11 @@ define double @v_sqrt_f64_afn_nsz(double %x) { ; GISEL-LABEL: v_sqrt_f64_afn_nsz: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1066,12 +1058,14 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) { ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b32 s4, 0 ; GISEL-NEXT: s_brev_b32 s5, 8 +; GISEL-NEXT: v_mov_b32_e32 v4, s4 +; GISEL-NEXT: v_mov_b32_e32 v5, s5 ; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5] -; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5 +; GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] +; GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] +; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 ; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -1142,11 +1136,11 @@ define double @v_sqrt_f64_afn_nnan(double %x) { ; GISEL-LABEL: v_sqrt_f64_afn_nnan: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1202,11 +1196,11 @@ define double @v_sqrt_f64_fabs_afn_ninf(double %x) { ; GISEL-LABEL: v_sqrt_f64_fabs_afn_ninf: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1263,11 +1257,11 @@ define double @v_sqrt_f64_afn_nnan_ninf(double %x) { ; GISEL-LABEL: v_sqrt_f64_afn_nnan_ninf: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1344,12 +1338,14 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) { ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b32 s4, 0 ; GISEL-NEXT: s_brev_b32 s5, 8 +; GISEL-NEXT: v_mov_b32_e32 v4, s4 +; GISEL-NEXT: v_mov_b32_e32 v5, s5 ; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5] -; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5 +; GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] +; GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] +; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 ; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -1420,11 +1416,11 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) { ; GISEL-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1480,11 +1476,11 @@ define double @v_sqrt_f64__approx_func_fp_math(double %x) #2 { ; GISEL-LABEL: v_sqrt_f64__approx_func_fp_math: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1540,11 +1536,11 @@ define double @v_sqrt_f64__enough_unsafe_attrs(double %x) #3 { ; GISEL-LABEL: v_sqrt_f64__enough_unsafe_attrs: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1600,11 +1596,11 @@ define double @v_sqrt_f64__unsafe_attr(double %x) #4 { ; GISEL-LABEL: v_sqrt_f64__unsafe_attr: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s4, 0 -; GISEL-NEXT: s_brev_b32 s5, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1681,12 +1677,14 @@ define <2 x double> @v_sqrt_v2f64(<2 x double> %x) { ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b32 s4, 0 ; GISEL-NEXT: s_brev_b32 s5, 8 +; GISEL-NEXT: v_mov_b32_e32 v4, s4 +; GISEL-NEXT: v_mov_b32_e32 v5, s5 ; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] -; GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5] -; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5 +; GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] +; GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] +; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 ; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -1795,17 +1793,19 @@ define <3 x double> @v_sqrt_v3f64(<3 x double> %x) { ; GISEL-LABEL: v_sqrt_v3f64: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s6, 0 -; GISEL-NEXT: s_brev_b32 s7, 8 -; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[0:1] -; GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[6:7], v[2:3] -; GISEL-NEXT: v_cmp_gt_f64_e64 s[6:7], s[6:7], v[4:5] -; GISEL-NEXT: v_mov_b32_e32 v6, 0x100 -; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc -; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, v6, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[6:7] -; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v7 +; GISEL-NEXT: s_mov_b32 s4, 0 +; GISEL-NEXT: s_brev_b32 s5, 8 +; GISEL-NEXT: v_mov_b32_e32 v6, s4 +; GISEL-NEXT: v_mov_b32_e32 v7, s5 +; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] +; GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7] +; GISEL-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[6:7] +; GISEL-NEXT: v_mov_b32_e32 v8, 0x100 +; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc +; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, v8, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v8, s[6:7] +; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v9 ; GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6 ; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1] ; GISEL-NEXT: v_rsq_f64_e32 v[8:9], v[2:3] @@ -1870,5 +1870,3 @@ attributes #1 = { convergent nounwind willreturn memory(none) } attributes #2 = { "approx-func-fp-math"="true" } attributes #3 = { "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" } attributes #4 = { "unsafe-fp-math"="true" } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll index 08e06d4dd015a5..d85778bc0195fb 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll @@ -53,16 +53,16 @@ entry: define amdgpu_kernel void @atomic_add_i32_max_neg_offset(ptr addrspace(1) %out, i32 %in) { ; SI-LABEL: atomic_add_i32_max_neg_offset: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, 0 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 ; SI-NEXT: v_mov_b32_e32 v0, 0xfffff000 ; SI-NEXT: v_mov_b32_e32 v1, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: v_mov_b32_e32 v2, s4 ; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_atomic_add v2, v[0:1], s[4:7], 0 addr64 +; SI-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 ; SI-NEXT: s_endpgm @@ -5403,12 +5403,12 @@ define amdgpu_kernel void @atomic_load_i32_negoffset(ptr addrspace(1) %in, ptr a ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: v_mov_b32_e32 v0, 0xfffffe00 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, s2 ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: s_mov_b32 s2, 0 ; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: v_mov_b32_e32 v0, 0xfffffe00 ; SI-NEXT: v_mov_b32_e32 v1, -1 ; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc @@ -6148,12 +6148,12 @@ define amdgpu_kernel void @atomic_load_i8_negoffset(ptr addrspace(1) %in, ptr ad ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: v_mov_b32_e32 v0, 0xfffffe00 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, s2 ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: s_mov_b32 s2, 0 ; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: v_mov_b32_e32 v0, 0xfffffe00 ; SI-NEXT: v_mov_b32_e32 v1, -1 ; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 glc @@ -6339,12 +6339,12 @@ define amdgpu_kernel void @atomic_load_i16_negoffset(ptr addrspace(1) %in, ptr a ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: v_mov_b32_e32 v0, 0xfffffe00 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, s2 ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: s_mov_b32 s2, 0 ; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: v_mov_b32_e32 v0, 0xfffffe00 ; SI-NEXT: v_mov_b32_e32 v1, -1 ; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 glc @@ -6606,16 +6606,16 @@ entry: define amdgpu_kernel void @atomic_inc_i32_max_neg_offset(ptr addrspace(1) %out, i32 %in) { ; SI-LABEL: atomic_inc_i32_max_neg_offset: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, 0 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 ; SI-NEXT: v_mov_b32_e32 v0, 0xfffff000 ; SI-NEXT: v_mov_b32_e32 v1, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: v_mov_b32_e32 v2, s4 ; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_atomic_inc v2, v[0:1], s[4:7], 0 addr64 +; SI-NEXT: buffer_atomic_inc v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 ; SI-NEXT: s_endpgm @@ -7001,16 +7001,16 @@ entry: define amdgpu_kernel void @atomic_dec_i32_max_neg_offset(ptr addrspace(1) %out, i32 %in) { ; SI-LABEL: atomic_dec_i32_max_neg_offset: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, 0 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 ; SI-NEXT: v_mov_b32_e32 v0, 0xfffff000 ; SI-NEXT: v_mov_b32_e32 v1, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: v_mov_b32_e32 v2, s4 ; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_atomic_dec v2, v[0:1], s[4:7], 0 addr64 +; SI-NEXT: buffer_atomic_dec v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 ; SI-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll index 429bdd805ec5e1..4cbd5e84871cc7 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll @@ -1096,8 +1096,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; GFX7LESS-NEXT: s_cbranch_execz .LBB2_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[2:3] -; GFX7LESS-NEXT: s_mov_b32 s7, 0x43300000 ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s7, 0x43300000 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0xc3300000 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 @@ -1141,27 +1141,28 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-NEXT: s_cbranch_execz .LBB2_3 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9-NEXT: s_mov_b32 s3, 0x43300000 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc3300000 -; GFX9-NEXT: v_add_f64 v[1:2], s[2:3], v[0:1] +; GFX9-NEXT: s_mov_b32 s3, 0x43300000 +; GFX9-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_mov_b64 s[2:3], 0 +; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX9-NEXT: v_cvt_f32_f64_e32 v1, v[1:2] +; GFX9-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: v_mul_f32_e32 v3, 4.0, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_mul_f32_e32 v2, 4.0, v0 ; GFX9-NEXT: .LBB2_2: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-NEXT: v_add_f32_e32 v1, v2, v3 -; GFX9-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX9-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX9-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX9-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_cbranch_execnz .LBB2_2 ; GFX9-NEXT: .LBB2_3: @@ -1169,25 +1170,23 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 ; GFX1064-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; GFX1064-NEXT: s_mov_b32 s10, -1 ; GFX1064-NEXT: s_mov_b32 s11, 0x31e16000 ; GFX1064-NEXT: s_add_u32 s8, s8, s3 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX1064-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-NEXT: s_addc_u32 s9, s9, 0 -; GFX1064-NEXT: s_mov_b32 s2, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB2_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1064-NEXT: s_mov_b32 s3, 0xc3300000 +; GFX1064-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064-NEXT: s_mov_b32 s3, 0x43300000 ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1064-NEXT: v_add_f64 v[0:1], 0xc3300000, s[2:3] ; GFX1064-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -1226,9 +1225,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; GFX1032-NEXT: ; %bb.1: ; GFX1032-NEXT: s_bcnt1_i32_b32 s4, s3 ; GFX1032-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1032-NEXT: s_mov_b32 s3, 0xc3300000 ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1032-NEXT: v_add_f64 v[0:1], 0xc3300000, s[4:5] ; GFX1032-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dword s3, s[0:1], 0x0 @@ -1255,8 +1253,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; GFX1164-NEXT: v_mov_b32_e32 v0, 0x43300000 ; GFX1164-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX1164-NEXT: s_mov_b32 s2, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], exec +; GFX1164-NEXT: s_mov_b64 s[2:3], exec ; GFX1164-NEXT: s_clause 0x1 ; GFX1164-NEXT: scratch_store_b32 off, v0, off offset:12 ; GFX1164-NEXT: scratch_store_b32 off, v1, off offset:8 @@ -1266,10 +1263,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1164-NEXT: s_cbranch_execz .LBB2_3 ; GFX1164-NEXT: ; %bb.1: -; GFX1164-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1164-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -1309,10 +1305,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1132-NEXT: s_cbranch_execz .LBB2_3 ; GFX1132-NEXT: ; %bb.1: -; GFX1132-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v3, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0 @@ -1350,27 +1345,28 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB2_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9-DPP-NEXT: s_mov_b32 s3, 0x43300000 ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0xc3300000 -; GFX9-DPP-NEXT: v_add_f64 v[1:2], s[2:3], v[0:1] +; GFX9-DPP-NEXT: s_mov_b32 s3, 0x43300000 +; GFX9-DPP-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] ; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX9-DPP-NEXT: v_cvt_f32_f64_e32 v1, v[1:2] +; GFX9-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-DPP-NEXT: v_mul_f32_e32 v3, 4.0, v1 +; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-DPP-NEXT: v_mul_f32_e32 v2, 4.0, v0 ; GFX9-DPP-NEXT: .LBB2_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-DPP-NEXT: v_add_f32_e32 v1, v2, v3 -; GFX9-DPP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX9-DPP-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB2_2 ; GFX9-DPP-NEXT: .LBB2_3: @@ -1378,25 +1374,23 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-DPP-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; GFX1064-DPP-NEXT: s_mov_b32 s10, -1 ; GFX1064-DPP-NEXT: s_mov_b32 s11, 0x31e16000 ; GFX1064-DPP-NEXT: s_add_u32 s8, s8, s3 -; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-DPP-NEXT: s_addc_u32 s9, s9, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s2, 0 +; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB2_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064-DPP-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1064-DPP-NEXT: s_mov_b32 s3, 0xc3300000 +; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064-DPP-NEXT: s_mov_b32 s3, 0x43300000 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-DPP-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1064-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, s[2:3] ; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -1435,9 +1429,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; GFX1032-DPP-NEXT: ; %bb.1: ; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s4, s3 ; GFX1032-DPP-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1032-DPP-NEXT: s_mov_b32 s3, 0xc3300000 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-DPP-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1032-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, s[4:5] ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dword s3, s[0:1], 0x0 @@ -1464,8 +1457,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX1164-DPP-NEXT: s_mov_b32 s2, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], exec +; GFX1164-DPP-NEXT: s_mov_b64 s[2:3], exec ; GFX1164-DPP-NEXT: s_clause 0x1 ; GFX1164-DPP-NEXT: scratch_store_b32 off, v0, off offset:12 ; GFX1164-DPP-NEXT: scratch_store_b32 off, v1, off offset:8 @@ -1475,10 +1467,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB2_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX1164-DPP-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1164-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -1518,10 +1509,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB2_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0 @@ -2356,8 +2346,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; GFX7LESS-NEXT: s_cbranch_execz .LBB4_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[2:3] -; GFX7LESS-NEXT: s_mov_b32 s7, 0x43300000 ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s7, 0x43300000 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0xc3300000 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 @@ -2401,27 +2391,28 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-NEXT: s_cbranch_execz .LBB4_3 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9-NEXT: s_mov_b32 s3, 0x43300000 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc3300000 -; GFX9-NEXT: v_add_f64 v[1:2], s[2:3], v[0:1] +; GFX9-NEXT: s_mov_b32 s3, 0x43300000 +; GFX9-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_mov_b64 s[2:3], 0 +; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX9-NEXT: v_cvt_f32_f64_e32 v1, v[1:2] +; GFX9-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: v_mul_f32_e32 v3, 4.0, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_mul_f32_e32 v2, 4.0, v0 ; GFX9-NEXT: .LBB4_2: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-NEXT: v_add_f32_e32 v1, v2, v3 -; GFX9-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX9-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX9-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX9-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_cbranch_execnz .LBB4_2 ; GFX9-NEXT: .LBB4_3: @@ -2429,25 +2420,23 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_uni_value_agent_scope_strictfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 ; GFX1064-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; GFX1064-NEXT: s_mov_b32 s10, -1 ; GFX1064-NEXT: s_mov_b32 s11, 0x31e16000 ; GFX1064-NEXT: s_add_u32 s8, s8, s3 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX1064-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-NEXT: s_addc_u32 s9, s9, 0 -; GFX1064-NEXT: s_mov_b32 s2, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB4_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1064-NEXT: s_mov_b32 s3, 0xc3300000 +; GFX1064-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064-NEXT: s_mov_b32 s3, 0x43300000 ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1064-NEXT: v_add_f64 v[0:1], 0xc3300000, s[2:3] ; GFX1064-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -2486,9 +2475,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; GFX1032-NEXT: ; %bb.1: ; GFX1032-NEXT: s_bcnt1_i32_b32 s4, s3 ; GFX1032-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1032-NEXT: s_mov_b32 s3, 0xc3300000 ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1032-NEXT: v_add_f64 v[0:1], 0xc3300000, s[4:5] ; GFX1032-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dword s3, s[0:1], 0x0 @@ -2515,8 +2503,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; GFX1164-NEXT: v_mov_b32_e32 v0, 0x43300000 ; GFX1164-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX1164-NEXT: s_mov_b32 s2, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], exec +; GFX1164-NEXT: s_mov_b64 s[2:3], exec ; GFX1164-NEXT: s_clause 0x1 ; GFX1164-NEXT: scratch_store_b32 off, v0, off offset:12 ; GFX1164-NEXT: scratch_store_b32 off, v1, off offset:8 @@ -2526,10 +2513,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1164-NEXT: s_cbranch_execz .LBB4_3 ; GFX1164-NEXT: ; %bb.1: -; GFX1164-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1164-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -2569,10 +2555,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1132-NEXT: s_cbranch_execz .LBB4_3 ; GFX1132-NEXT: ; %bb.1: -; GFX1132-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v3, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0 @@ -2610,27 +2595,28 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB4_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9-DPP-NEXT: s_mov_b32 s3, 0x43300000 ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0xc3300000 -; GFX9-DPP-NEXT: v_add_f64 v[1:2], s[2:3], v[0:1] +; GFX9-DPP-NEXT: s_mov_b32 s3, 0x43300000 +; GFX9-DPP-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] ; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX9-DPP-NEXT: v_cvt_f32_f64_e32 v1, v[1:2] +; GFX9-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-DPP-NEXT: v_mul_f32_e32 v3, 4.0, v1 +; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-DPP-NEXT: v_mul_f32_e32 v2, 4.0, v0 ; GFX9-DPP-NEXT: .LBB4_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-DPP-NEXT: v_add_f32_e32 v1, v2, v3 -; GFX9-DPP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX9-DPP-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB4_2 ; GFX9-DPP-NEXT: .LBB4_3: @@ -2638,25 +2624,23 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_uni_value_agent_scope_strictfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-DPP-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; GFX1064-DPP-NEXT: s_mov_b32 s10, -1 ; GFX1064-DPP-NEXT: s_mov_b32 s11, 0x31e16000 ; GFX1064-DPP-NEXT: s_add_u32 s8, s8, s3 -; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-DPP-NEXT: s_addc_u32 s9, s9, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s2, 0 +; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB4_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064-DPP-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1064-DPP-NEXT: s_mov_b32 s3, 0xc3300000 +; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064-DPP-NEXT: s_mov_b32 s3, 0x43300000 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-DPP-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1064-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, s[2:3] ; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -2695,9 +2679,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; GFX1032-DPP-NEXT: ; %bb.1: ; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s4, s3 ; GFX1032-DPP-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1032-DPP-NEXT: s_mov_b32 s3, 0xc3300000 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-DPP-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1032-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, s[4:5] ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dword s3, s[0:1], 0x0 @@ -2724,8 +2707,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX1164-DPP-NEXT: s_mov_b32 s2, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], exec +; GFX1164-DPP-NEXT: s_mov_b64 s[2:3], exec ; GFX1164-DPP-NEXT: s_clause 0x1 ; GFX1164-DPP-NEXT: scratch_store_b32 off, v0, off offset:12 ; GFX1164-DPP-NEXT: scratch_store_b32 off, v1, off offset:8 @@ -2735,10 +2717,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB4_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX1164-DPP-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1164-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -2778,10 +2759,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB4_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0 @@ -4320,8 +4300,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; GFX7LESS-NEXT: s_cbranch_execz .LBB7_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[2:3] -; GFX7LESS-NEXT: s_mov_b32 s7, 0x43300000 ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s7, 0x43300000 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0xc3300000 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 @@ -4365,27 +4345,28 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-NEXT: s_cbranch_execz .LBB7_3 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9-NEXT: s_mov_b32 s3, 0x43300000 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc3300000 -; GFX9-NEXT: v_add_f64 v[1:2], s[2:3], v[0:1] +; GFX9-NEXT: s_mov_b32 s3, 0x43300000 +; GFX9-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_mov_b64 s[2:3], 0 +; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX9-NEXT: v_cvt_f32_f64_e32 v1, v[1:2] +; GFX9-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: v_mul_f32_e32 v3, 4.0, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_mul_f32_e32 v2, 4.0, v0 ; GFX9-NEXT: .LBB7_2: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-NEXT: v_add_f32_e32 v1, v2, v3 -; GFX9-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX9-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX9-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX9-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_cbranch_execnz .LBB7_2 ; GFX9-NEXT: .LBB7_3: @@ -4393,25 +4374,23 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_uni_value_defalut_scope_strictfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 ; GFX1064-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; GFX1064-NEXT: s_mov_b32 s10, -1 ; GFX1064-NEXT: s_mov_b32 s11, 0x31e16000 ; GFX1064-NEXT: s_add_u32 s8, s8, s3 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX1064-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-NEXT: s_addc_u32 s9, s9, 0 -; GFX1064-NEXT: s_mov_b32 s2, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB7_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1064-NEXT: s_mov_b32 s3, 0xc3300000 +; GFX1064-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064-NEXT: s_mov_b32 s3, 0x43300000 ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1064-NEXT: v_add_f64 v[0:1], 0xc3300000, s[2:3] ; GFX1064-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -4450,9 +4429,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; GFX1032-NEXT: ; %bb.1: ; GFX1032-NEXT: s_bcnt1_i32_b32 s4, s3 ; GFX1032-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1032-NEXT: s_mov_b32 s3, 0xc3300000 ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1032-NEXT: v_add_f64 v[0:1], 0xc3300000, s[4:5] ; GFX1032-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dword s3, s[0:1], 0x0 @@ -4479,8 +4457,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; GFX1164-NEXT: v_mov_b32_e32 v0, 0x43300000 ; GFX1164-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX1164-NEXT: s_mov_b32 s2, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], exec +; GFX1164-NEXT: s_mov_b64 s[2:3], exec ; GFX1164-NEXT: s_clause 0x1 ; GFX1164-NEXT: scratch_store_b32 off, v0, off offset:12 ; GFX1164-NEXT: scratch_store_b32 off, v1, off offset:8 @@ -4490,10 +4467,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1164-NEXT: s_cbranch_execz .LBB7_3 ; GFX1164-NEXT: ; %bb.1: -; GFX1164-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1164-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -4533,10 +4509,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1132-NEXT: s_cbranch_execz .LBB7_3 ; GFX1132-NEXT: ; %bb.1: -; GFX1132-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v3, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0 @@ -4574,27 +4549,28 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9-DPP-NEXT: s_mov_b32 s3, 0x43300000 ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0xc3300000 -; GFX9-DPP-NEXT: v_add_f64 v[1:2], s[2:3], v[0:1] +; GFX9-DPP-NEXT: s_mov_b32 s3, 0x43300000 +; GFX9-DPP-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] ; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX9-DPP-NEXT: v_cvt_f32_f64_e32 v1, v[1:2] +; GFX9-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-DPP-NEXT: v_mul_f32_e32 v3, 4.0, v1 +; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-DPP-NEXT: v_mul_f32_e32 v2, 4.0, v0 ; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-DPP-NEXT: v_add_f32_e32 v1, v2, v3 -; GFX9-DPP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX9-DPP-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX9-DPP-NEXT: .LBB7_3: @@ -4602,25 +4578,23 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_uni_value_defalut_scope_strictfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-DPP-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; GFX1064-DPP-NEXT: s_mov_b32 s10, -1 ; GFX1064-DPP-NEXT: s_mov_b32 s11, 0x31e16000 ; GFX1064-DPP-NEXT: s_add_u32 s8, s8, s3 -; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-DPP-NEXT: s_addc_u32 s9, s9, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s2, 0 +; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064-DPP-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1064-DPP-NEXT: s_mov_b32 s3, 0xc3300000 +; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064-DPP-NEXT: s_mov_b32 s3, 0x43300000 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-DPP-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1064-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, s[2:3] ; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -4659,9 +4633,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; GFX1032-DPP-NEXT: ; %bb.1: ; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s4, s3 ; GFX1032-DPP-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1032-DPP-NEXT: s_mov_b32 s3, 0xc3300000 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-DPP-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1032-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, s[4:5] ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dword s3, s[0:1], 0x0 @@ -4688,8 +4661,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX1164-DPP-NEXT: s_mov_b32 s2, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], exec +; GFX1164-DPP-NEXT: s_mov_b64 s[2:3], exec ; GFX1164-DPP-NEXT: s_clause 0x1 ; GFX1164-DPP-NEXT: scratch_store_b32 off, v0, off offset:12 ; GFX1164-DPP-NEXT: scratch_store_b32 off, v1, off offset:8 @@ -4699,10 +4671,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX1164-DPP-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1164-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -4742,10 +4713,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll index f05a420a1b0a26..4a00d7bc71bca8 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll @@ -1192,8 +1192,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; GFX7LESS-NEXT: s_cbranch_execz .LBB2_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[2:3] -; GFX7LESS-NEXT: s_mov_b32 s7, 0x43300000 ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s7, 0x43300000 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0xc3300000 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 @@ -1237,27 +1237,28 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-NEXT: s_cbranch_execz .LBB2_3 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9-NEXT: s_mov_b32 s3, 0x43300000 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc3300000 -; GFX9-NEXT: v_add_f64 v[1:2], s[2:3], v[0:1] +; GFX9-NEXT: s_mov_b32 s3, 0x43300000 +; GFX9-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_mov_b64 s[2:3], 0 +; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX9-NEXT: v_cvt_f32_f64_e32 v1, v[1:2] +; GFX9-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: v_mul_f32_e32 v3, 4.0, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_mul_f32_e32 v2, 4.0, v0 ; GFX9-NEXT: .LBB2_2: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-NEXT: v_sub_f32_e32 v1, v2, v3 -; GFX9-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX9-NEXT: v_sub_f32_e32 v0, v1, v2 +; GFX9-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX9-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_cbranch_execnz .LBB2_2 ; GFX9-NEXT: .LBB2_3: @@ -1265,25 +1266,23 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_uni_value_one_as_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 ; GFX1064-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; GFX1064-NEXT: s_mov_b32 s10, -1 ; GFX1064-NEXT: s_mov_b32 s11, 0x31e16000 ; GFX1064-NEXT: s_add_u32 s8, s8, s3 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX1064-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-NEXT: s_addc_u32 s9, s9, 0 -; GFX1064-NEXT: s_mov_b32 s2, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB2_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1064-NEXT: s_mov_b32 s3, 0xc3300000 +; GFX1064-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064-NEXT: s_mov_b32 s3, 0x43300000 ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1064-NEXT: v_add_f64 v[0:1], 0xc3300000, s[2:3] ; GFX1064-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -1322,9 +1321,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; GFX1032-NEXT: ; %bb.1: ; GFX1032-NEXT: s_bcnt1_i32_b32 s4, s3 ; GFX1032-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1032-NEXT: s_mov_b32 s3, 0xc3300000 ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1032-NEXT: v_add_f64 v[0:1], 0xc3300000, s[4:5] ; GFX1032-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dword s3, s[0:1], 0x0 @@ -1351,8 +1349,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; GFX1164-NEXT: v_mov_b32_e32 v0, 0x43300000 ; GFX1164-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX1164-NEXT: s_mov_b32 s2, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], exec +; GFX1164-NEXT: s_mov_b64 s[2:3], exec ; GFX1164-NEXT: s_clause 0x1 ; GFX1164-NEXT: scratch_store_b32 off, v0, off offset:12 ; GFX1164-NEXT: scratch_store_b32 off, v1, off offset:8 @@ -1362,10 +1359,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1164-NEXT: s_cbranch_execz .LBB2_3 ; GFX1164-NEXT: ; %bb.1: -; GFX1164-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1164-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -1405,10 +1401,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1132-NEXT: s_cbranch_execz .LBB2_3 ; GFX1132-NEXT: ; %bb.1: -; GFX1132-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v3, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0 @@ -1446,27 +1441,28 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB2_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9-DPP-NEXT: s_mov_b32 s3, 0x43300000 ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0xc3300000 -; GFX9-DPP-NEXT: v_add_f64 v[1:2], s[2:3], v[0:1] +; GFX9-DPP-NEXT: s_mov_b32 s3, 0x43300000 +; GFX9-DPP-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] ; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX9-DPP-NEXT: v_cvt_f32_f64_e32 v1, v[1:2] +; GFX9-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-DPP-NEXT: v_mul_f32_e32 v3, 4.0, v1 +; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-DPP-NEXT: v_mul_f32_e32 v2, 4.0, v0 ; GFX9-DPP-NEXT: .LBB2_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-DPP-NEXT: v_sub_f32_e32 v1, v2, v3 -; GFX9-DPP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX9-DPP-NEXT: v_sub_f32_e32 v0, v1, v2 +; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB2_2 ; GFX9-DPP-NEXT: .LBB2_3: @@ -1474,25 +1470,23 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_uni_value_one_as_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-DPP-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; GFX1064-DPP-NEXT: s_mov_b32 s10, -1 ; GFX1064-DPP-NEXT: s_mov_b32 s11, 0x31e16000 ; GFX1064-DPP-NEXT: s_add_u32 s8, s8, s3 -; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-DPP-NEXT: s_addc_u32 s9, s9, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s2, 0 +; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB2_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064-DPP-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1064-DPP-NEXT: s_mov_b32 s3, 0xc3300000 +; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064-DPP-NEXT: s_mov_b32 s3, 0x43300000 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-DPP-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1064-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, s[2:3] ; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -1531,9 +1525,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; GFX1032-DPP-NEXT: ; %bb.1: ; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s4, s3 ; GFX1032-DPP-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1032-DPP-NEXT: s_mov_b32 s3, 0xc3300000 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-DPP-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1032-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, s[4:5] ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dword s3, s[0:1], 0x0 @@ -1560,8 +1553,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX1164-DPP-NEXT: s_mov_b32 s2, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], exec +; GFX1164-DPP-NEXT: s_mov_b64 s[2:3], exec ; GFX1164-DPP-NEXT: s_clause 0x1 ; GFX1164-DPP-NEXT: scratch_store_b32 off, v0, off offset:12 ; GFX1164-DPP-NEXT: scratch_store_b32 off, v1, off offset:8 @@ -1571,10 +1563,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB2_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX1164-DPP-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1164-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -1614,10 +1605,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB2_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0 @@ -2452,8 +2442,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; GFX7LESS-NEXT: s_cbranch_execz .LBB4_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[2:3] -; GFX7LESS-NEXT: s_mov_b32 s7, 0x43300000 ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s7, 0x43300000 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0xc3300000 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 @@ -2497,27 +2487,28 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-NEXT: s_cbranch_execz .LBB4_3 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9-NEXT: s_mov_b32 s3, 0x43300000 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc3300000 -; GFX9-NEXT: v_add_f64 v[1:2], s[2:3], v[0:1] +; GFX9-NEXT: s_mov_b32 s3, 0x43300000 +; GFX9-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_mov_b64 s[2:3], 0 +; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX9-NEXT: v_cvt_f32_f64_e32 v1, v[1:2] +; GFX9-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: v_mul_f32_e32 v3, 4.0, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_mul_f32_e32 v2, 4.0, v0 ; GFX9-NEXT: .LBB4_2: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-NEXT: v_sub_f32_e32 v1, v2, v3 -; GFX9-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX9-NEXT: v_sub_f32_e32 v0, v1, v2 +; GFX9-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX9-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_cbranch_execnz .LBB4_2 ; GFX9-NEXT: .LBB4_3: @@ -2525,25 +2516,23 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 ; GFX1064-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; GFX1064-NEXT: s_mov_b32 s10, -1 ; GFX1064-NEXT: s_mov_b32 s11, 0x31e16000 ; GFX1064-NEXT: s_add_u32 s8, s8, s3 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX1064-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-NEXT: s_addc_u32 s9, s9, 0 -; GFX1064-NEXT: s_mov_b32 s2, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB4_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1064-NEXT: s_mov_b32 s3, 0xc3300000 +; GFX1064-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064-NEXT: s_mov_b32 s3, 0x43300000 ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1064-NEXT: v_add_f64 v[0:1], 0xc3300000, s[2:3] ; GFX1064-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -2582,9 +2571,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; GFX1032-NEXT: ; %bb.1: ; GFX1032-NEXT: s_bcnt1_i32_b32 s4, s3 ; GFX1032-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1032-NEXT: s_mov_b32 s3, 0xc3300000 ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1032-NEXT: v_add_f64 v[0:1], 0xc3300000, s[4:5] ; GFX1032-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dword s3, s[0:1], 0x0 @@ -2611,8 +2599,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; GFX1164-NEXT: v_mov_b32_e32 v0, 0x43300000 ; GFX1164-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX1164-NEXT: s_mov_b32 s2, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], exec +; GFX1164-NEXT: s_mov_b64 s[2:3], exec ; GFX1164-NEXT: s_clause 0x1 ; GFX1164-NEXT: scratch_store_b32 off, v0, off offset:12 ; GFX1164-NEXT: scratch_store_b32 off, v1, off offset:8 @@ -2622,10 +2609,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1164-NEXT: s_cbranch_execz .LBB4_3 ; GFX1164-NEXT: ; %bb.1: -; GFX1164-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1164-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -2665,10 +2651,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1132-NEXT: s_cbranch_execz .LBB4_3 ; GFX1132-NEXT: ; %bb.1: -; GFX1132-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v3, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0 @@ -2706,27 +2691,28 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB4_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9-DPP-NEXT: s_mov_b32 s3, 0x43300000 ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0xc3300000 -; GFX9-DPP-NEXT: v_add_f64 v[1:2], s[2:3], v[0:1] +; GFX9-DPP-NEXT: s_mov_b32 s3, 0x43300000 +; GFX9-DPP-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] ; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX9-DPP-NEXT: v_cvt_f32_f64_e32 v1, v[1:2] +; GFX9-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-DPP-NEXT: v_mul_f32_e32 v3, 4.0, v1 +; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-DPP-NEXT: v_mul_f32_e32 v2, 4.0, v0 ; GFX9-DPP-NEXT: .LBB4_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-DPP-NEXT: v_sub_f32_e32 v1, v2, v3 -; GFX9-DPP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX9-DPP-NEXT: v_sub_f32_e32 v0, v1, v2 +; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB4_2 ; GFX9-DPP-NEXT: .LBB4_3: @@ -2734,25 +2720,23 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-DPP-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; GFX1064-DPP-NEXT: s_mov_b32 s10, -1 ; GFX1064-DPP-NEXT: s_mov_b32 s11, 0x31e16000 ; GFX1064-DPP-NEXT: s_add_u32 s8, s8, s3 -; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-DPP-NEXT: s_addc_u32 s9, s9, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s2, 0 +; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB4_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064-DPP-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1064-DPP-NEXT: s_mov_b32 s3, 0xc3300000 +; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064-DPP-NEXT: s_mov_b32 s3, 0x43300000 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-DPP-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1064-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, s[2:3] ; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -2791,9 +2775,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; GFX1032-DPP-NEXT: ; %bb.1: ; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s4, s3 ; GFX1032-DPP-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1032-DPP-NEXT: s_mov_b32 s3, 0xc3300000 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-DPP-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1032-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, s[4:5] ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dword s3, s[0:1], 0x0 @@ -2820,8 +2803,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX1164-DPP-NEXT: s_mov_b32 s2, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], exec +; GFX1164-DPP-NEXT: s_mov_b64 s[2:3], exec ; GFX1164-DPP-NEXT: s_clause 0x1 ; GFX1164-DPP-NEXT: scratch_store_b32 off, v0, off offset:12 ; GFX1164-DPP-NEXT: scratch_store_b32 off, v1, off offset:8 @@ -2831,10 +2813,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB4_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX1164-DPP-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1164-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -2874,10 +2855,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB4_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0 @@ -4504,8 +4484,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; GFX7LESS-NEXT: s_cbranch_execz .LBB7_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[2:3] -; GFX7LESS-NEXT: s_mov_b32 s7, 0x43300000 ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s7, 0x43300000 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0xc3300000 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], 0 @@ -4549,27 +4529,28 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-NEXT: s_cbranch_execz .LBB7_3 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9-NEXT: s_mov_b32 s3, 0x43300000 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc3300000 -; GFX9-NEXT: v_add_f64 v[1:2], s[2:3], v[0:1] +; GFX9-NEXT: s_mov_b32 s3, 0x43300000 +; GFX9-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_mov_b64 s[2:3], 0 +; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX9-NEXT: v_cvt_f32_f64_e32 v1, v[1:2] +; GFX9-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: v_mul_f32_e32 v3, 4.0, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_mul_f32_e32 v2, 4.0, v0 ; GFX9-NEXT: .LBB7_2: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-NEXT: v_sub_f32_e32 v1, v2, v3 -; GFX9-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX9-NEXT: v_sub_f32_e32 v0, v1, v2 +; GFX9-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX9-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_cbranch_execnz .LBB7_2 ; GFX9-NEXT: .LBB7_3: @@ -4577,25 +4558,23 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_uni_value_defalut_scope_strictfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 ; GFX1064-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; GFX1064-NEXT: s_mov_b32 s10, -1 ; GFX1064-NEXT: s_mov_b32 s11, 0x31e16000 ; GFX1064-NEXT: s_add_u32 s8, s8, s3 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX1064-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-NEXT: s_addc_u32 s9, s9, 0 -; GFX1064-NEXT: s_mov_b32 s2, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB7_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1064-NEXT: s_mov_b32 s3, 0xc3300000 +; GFX1064-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064-NEXT: s_mov_b32 s3, 0x43300000 ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1064-NEXT: v_add_f64 v[0:1], 0xc3300000, s[2:3] ; GFX1064-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -4634,9 +4613,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; GFX1032-NEXT: ; %bb.1: ; GFX1032-NEXT: s_bcnt1_i32_b32 s4, s3 ; GFX1032-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1032-NEXT: s_mov_b32 s3, 0xc3300000 ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1032-NEXT: v_add_f64 v[0:1], 0xc3300000, s[4:5] ; GFX1032-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dword s3, s[0:1], 0x0 @@ -4663,8 +4641,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; GFX1164-NEXT: v_mov_b32_e32 v0, 0x43300000 ; GFX1164-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX1164-NEXT: s_mov_b32 s2, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], exec +; GFX1164-NEXT: s_mov_b64 s[2:3], exec ; GFX1164-NEXT: s_clause 0x1 ; GFX1164-NEXT: scratch_store_b32 off, v0, off offset:12 ; GFX1164-NEXT: scratch_store_b32 off, v1, off offset:8 @@ -4674,10 +4651,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1164-NEXT: s_cbranch_execz .LBB7_3 ; GFX1164-NEXT: ; %bb.1: -; GFX1164-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1164-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -4717,10 +4693,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1132-NEXT: s_cbranch_execz .LBB7_3 ; GFX1132-NEXT: ; %bb.1: -; GFX1132-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1132-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v3, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b32 s3, s[0:1], 0x0 @@ -4758,27 +4733,28 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9-DPP-NEXT: s_mov_b32 s3, 0x43300000 ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0xc3300000 -; GFX9-DPP-NEXT: v_add_f64 v[1:2], s[2:3], v[0:1] +; GFX9-DPP-NEXT: s_mov_b32 s3, 0x43300000 +; GFX9-DPP-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] ; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[2:3], 0 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX9-DPP-NEXT: v_cvt_f32_f64_e32 v1, v[1:2] +; GFX9-DPP-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-DPP-NEXT: v_mul_f32_e32 v3, 4.0, v1 +; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-DPP-NEXT: v_mul_f32_e32 v2, 4.0, v0 ; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-DPP-NEXT: v_sub_f32_e32 v1, v2, v3 -; GFX9-DPP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc +; GFX9-DPP-NEXT: v_sub_f32_e32 v0, v1, v2 +; GFX9-DPP-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX9-DPP-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-DPP-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX9-DPP-NEXT: .LBB7_3: @@ -4786,25 +4762,23 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_uni_value_defalut_scope_strictfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-DPP-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; GFX1064-DPP-NEXT: s_mov_b32 s10, -1 ; GFX1064-DPP-NEXT: s_mov_b32 s11, 0x31e16000 ; GFX1064-DPP-NEXT: s_add_u32 s8, s8, s3 -; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-DPP-NEXT: s_addc_u32 s9, s9, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s2, 0 +; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064-DPP-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1064-DPP-NEXT: s_mov_b32 s3, 0xc3300000 +; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064-DPP-NEXT: s_mov_b32 s3, 0x43300000 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-DPP-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1064-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, s[2:3] ; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -4843,9 +4817,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; GFX1032-DPP-NEXT: ; %bb.1: ; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s4, s3 ; GFX1032-DPP-NEXT: s_mov_b32 s5, 0x43300000 -; GFX1032-DPP-NEXT: s_mov_b32 s3, 0xc3300000 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-DPP-NEXT: v_add_f64 v[0:1], s[4:5], s[2:3] +; GFX1032-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, s[4:5] ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dword s3, s[0:1], 0x0 @@ -4872,8 +4845,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX1164-DPP-NEXT: s_mov_b32 s2, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], exec +; GFX1164-DPP-NEXT: s_mov_b64 s[2:3], exec ; GFX1164-DPP-NEXT: s_clause 0x1 ; GFX1164-DPP-NEXT: scratch_store_b32 off, v0, off offset:12 ; GFX1164-DPP-NEXT: scratch_store_b32 off, v1, off offset:8 @@ -4883,10 +4855,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX1164-DPP-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1164-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -4926,10 +4897,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v2 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_mov_b32 s3, 0xc3300000 -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX1132-DPP-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] +; GFX1132-DPP-NEXT: v_add_f64 v[0:1], 0xc3300000, v[0:1] +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b32 s3, s[0:1], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.ll index 173e4f656c90de..a290dd5fd145c9 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.ll @@ -205,9 +205,8 @@ entry: ; FIXME: Should not have intermediate sgprs ; CHECK-LABEL: {{^}}i64_imm_input_phys_vgpr: -; CHECK: s_mov_b64 s[0:1], 0x1e240 -; CHECK: v_mov_b32_e32 v0, s0 -; CHECK: v_mov_b32_e32 v1, s1 +; CHECK: v_mov_b32_e32 v0, 0x1e240 +; CHECK: v_mov_b32_e32 v1, 0 ; CHECK: use v[0:1] define amdgpu_kernel void @i64_imm_input_phys_vgpr() { entry: diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll index f5d41b246b1b8c..220ea962b9e1dc 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll @@ -85,17 +85,18 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg ; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12 ; GFX11-NEXT: s_mov_b32 s13, s14 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: s_mov_b32 s21, s14 +; GFX11-NEXT: s_mov_b32 s3, s14 ; GFX11-NEXT: s_mov_b32 s14, s15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_mov_b32 s14, s21 +; GFX11-NEXT: s_mov_b32 s14, s3 ; GFX11-NEXT: s_mov_b32 s1, -1 -; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s3 -; GFX11-NEXT: s_cbranch_vccz .LBB2_4 +; GFX11-NEXT: s_cbranch_execz .LBB2_4 ; GFX11-NEXT: s_branch .LBB2_12 ; GFX11-NEXT: .LBB2_3: ; GFX11-NEXT: s_mov_b32 s1, 0 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 +; GFX11-NEXT: s_cbranch_vccnz .LBB2_12 ; GFX11-NEXT: .LBB2_4: ; %bb16 ; GFX11-NEXT: s_load_b32 s2, s[16:17], 0x54 ; GFX11-NEXT: s_bitcmp1_b32 s23, 0 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll index ae470efc92feee..3bc503e3714fe5 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -289,16 +289,15 @@ entry: define amdgpu_kernel void @half4_inselt(ptr addrspace(1) %out, <4 x half> %vec, i32 %sel) { ; GCN-LABEL: half4_inselt: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_load_dword s7, s[0:1], 0x34 +; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GCN-NEXT: s_mov_b64 s[4:5], 0xffff -; GCN-NEXT: s_mov_b32 s6, 0x3c003c00 +; GCN-NEXT: s_mov_b32 s4, 0x3c003c00 +; GCN-NEXT: s_mov_b32 s5, s4 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshl_b32 s7, s7, 4 -; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s7 -; GCN-NEXT: s_mov_b32 s7, s6 -; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5] -; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] +; GCN-NEXT: s_lshl_b32 s6, s6, 4 +; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6 +; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7] +; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: v_mov_b32_e32 v2, s2 @@ -419,16 +418,15 @@ entry: define amdgpu_kernel void @short4_inselt(ptr addrspace(1) %out, <4 x i16> %vec, i32 %sel) { ; GCN-LABEL: short4_inselt: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_load_dword s7, s[0:1], 0x34 +; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GCN-NEXT: s_mov_b64 s[4:5], 0xffff -; GCN-NEXT: s_mov_b32 s6, 0x10001 +; GCN-NEXT: s_mov_b32 s4, 0x10001 +; GCN-NEXT: s_mov_b32 s5, s4 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshl_b32 s7, s7, 4 -; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s7 -; GCN-NEXT: s_mov_b32 s7, s6 -; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5] -; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] +; GCN-NEXT: s_lshl_b32 s6, s6, 4 +; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6 +; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7] +; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: v_mov_b32_e32 v2, s2 @@ -445,12 +443,11 @@ entry: define amdgpu_kernel void @byte8_inselt(ptr addrspace(1) %out, <8 x i8> %vec, i32 %sel) { ; GCN-LABEL: byte8_inselt: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 +; GCN-NEXT: s_load_dword s4, s[0:1], 0x34 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GCN-NEXT: s_mov_b64 s[4:5], 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshl_b32 s6, s6, 3 -; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 +; GCN-NEXT: s_lshl_b32 s4, s4, 3 +; GCN-NEXT: s_lshl_b64 s[4:5], 0xff, s4 ; GCN-NEXT: s_and_b32 s7, s5, 0x1010101 ; GCN-NEXT: s_and_b32 s6, s4, 0x1010101 ; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index c58dbd6bd12069..68427e8937bb94 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1550,10 +1550,9 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(ptr addrspace(1) %out, <3 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_lshl_b32 s0, s8, 4 ; SI-NEXT: s_mov_b32 s5, s1 -; SI-NEXT: s_lshl_b32 s8, s8, 4 -; SI-NEXT: s_mov_b64 s[0:1], 0xffff -; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 +; SI-NEXT: s_lshl_b64 s[0:1], 0xffff, s0 ; SI-NEXT: s_and_b32 s9, s1, 0x50005 ; SI-NEXT: s_and_b32 s8, s0, 0x50005 ; SI-NEXT: s_andn2_b64 s[0:1], s[2:3], s[0:1] @@ -1572,11 +1571,10 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(ptr addrspace(1) %out, <3 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_mov_b32 s4, s0 -; VI-NEXT: s_mov_b32 s5, s1 -; VI-NEXT: s_lshl_b32 s8, s8, 4 -; VI-NEXT: s_mov_b64 s[0:1], 0xffff -; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 +; VI-NEXT: s_lshl_b32 s0, s8, 4 ; VI-NEXT: s_mov_b32 s8, 0x50005 +; VI-NEXT: s_mov_b32 s5, s1 +; VI-NEXT: s_lshl_b64 s[0:1], 0xffff, s0 ; VI-NEXT: s_mov_b32 s9, s8 ; VI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1] ; VI-NEXT: s_and_b64 s[0:1], s[0:1], s[8:9] @@ -1725,17 +1723,16 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(ptr addrspace(1) %out, p ; SI-NEXT: s_mov_b32 s7, 0x100f000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_lshl_b32 s0, s8, 3 ; SI-NEXT: s_mov_b32 s5, s1 -; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; SI-NEXT: s_lshl_b32 s8, s8, 3 -; SI-NEXT: s_mov_b64 s[2:3], 0xff -; SI-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 -; SI-NEXT: s_and_b32 s9, s3, 0x5050505 -; SI-NEXT: s_and_b32 s8, s2, 0x5050505 +; SI-NEXT: s_lshl_b64 s[0:1], 0xff, s0 +; SI-NEXT: s_and_b32 s9, s1, 0x5050505 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] -; SI-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] +; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1] +; SI-NEXT: s_and_b32 s8, s0, 0x5050505 +; SI-NEXT: s_or_b64 s[0:1], s[8:9], s[2:3] ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: v_mov_b32_e32 v1, s1 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -1748,17 +1745,16 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(ptr addrspace(1) %out, p ; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_lshl_b32 s0, s8, 3 ; VI-NEXT: s_mov_b32 s5, s1 -; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; VI-NEXT: s_lshl_b32 s8, s8, 3 -; VI-NEXT: s_mov_b64 s[2:3], 0xff -; VI-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 -; VI-NEXT: s_and_b32 s9, s3, 0x5050505 -; VI-NEXT: s_and_b32 s8, s2, 0x5050505 +; VI-NEXT: s_lshl_b64 s[0:1], 0xff, s0 +; VI-NEXT: s_and_b32 s9, s1, 0x5050505 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3] -; VI-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] +; VI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1] +; VI-NEXT: s_and_b32 s8, s0, 0x5050505 +; VI-NEXT: s_or_b64 s[0:1], s[8:9], s[2:3] ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index f98b41ba199bd7..47f7943e076a4a 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -2064,19 +2064,17 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(ptr addrspace(1) % ; ; GFX11-LABEL: v_insertelement_v4i16_dynamic_vgpr: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 -; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x10 ; GFX11-NEXT: global_load_b32 v2, v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0 -; GFX11-NEXT: s_mov_b64 s[0:1], 0xffff +; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x10 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_load_b64 v[0:1], v4, s[6:7] +; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s0 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 4, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b64 v[2:3], v2, s[0:1] -; GFX11-NEXT: s_pack_ll_b32_b16 s0, s2, s2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b64 v[2:3], v2, 0xffff ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_bfi_b32 v1, v3, s0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -2106,12 +2104,11 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(ptr addrspace(1) % ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] -; GFX9-NEXT: s_mov_b64 s[2:3], 0xffff -; GFX9-NEXT: s_lshl_b32 s4, s7, 4 -; GFX9-NEXT: s_pack_ll_b32_b16 s5, s6, s6 -; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 -; GFX9-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-NEXT: v_mov_b32_e32 v4, s5 +; GFX9-NEXT: s_lshl_b32 s2, s7, 4 +; GFX9-NEXT: s_pack_ll_b32_b16 s4, s6, s6 +; GFX9-NEXT: s_lshl_b64 s[2:3], 0xffff, s2 +; GFX9-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NEXT: v_mov_b32_e32 v4, s4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_bfi_b32 v1, s3, v3, v1 ; GFX9-NEXT: v_bfi_b32 v0, s2, v4, v0 @@ -2128,14 +2125,13 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(ptr addrspace(1) % ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; VI-NEXT: s_mov_b64 s[2:3], 0xffff ; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: s_lshl_b32 s1, s5, 4 -; VI-NEXT: s_lshl_b32 s5, s4, 16 -; VI-NEXT: s_and_b32 s4, s4, 0xffff +; VI-NEXT: s_lshl_b32 s1, s4, 16 +; VI-NEXT: s_and_b32 s2, s4, 0xffff +; VI-NEXT: s_lshl_b32 s3, s5, 4 +; VI-NEXT: s_or_b32 s2, s2, s1 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: s_lshl_b64 s[0:1], s[2:3], s1 -; VI-NEXT: s_or_b32 s2, s4, s5 +; VI-NEXT: s_lshl_b64 s[0:1], 0xffff, s3 ; VI-NEXT: v_mov_b32_e32 v4, s2 ; VI-NEXT: v_mov_b32_e32 v5, s2 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc @@ -2155,14 +2151,13 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(ptr addrspace(1) % ; CI-NEXT: v_add_i32_e32 v0, vcc, s2, v2 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; CI-NEXT: s_mov_b64 s[2:3], 0xffff ; CI-NEXT: v_mov_b32_e32 v3, s1 -; CI-NEXT: s_and_b32 s6, s4, 0xffff -; CI-NEXT: s_lshl_b32 s1, s5, 4 -; CI-NEXT: s_lshl_b32 s4, s4, 16 +; CI-NEXT: s_and_b32 s1, s4, 0xffff +; CI-NEXT: s_lshl_b32 s2, s4, 16 +; CI-NEXT: s_lshl_b32 s3, s5, 4 +; CI-NEXT: s_or_b32 s2, s1, s2 ; CI-NEXT: v_add_i32_e32 v2, vcc, s0, v2 -; CI-NEXT: s_lshl_b64 s[0:1], s[2:3], s1 -; CI-NEXT: s_or_b32 s2, s6, s4 +; CI-NEXT: s_lshl_b64 s[0:1], 0xffff, s3 ; CI-NEXT: v_mov_b32_e32 v4, s2 ; CI-NEXT: v_mov_b32_e32 v5, s2 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc @@ -2177,15 +2172,14 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(ptr addrspace(1) % ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x10 -; GFX11-NEXT: s_mov_b64 s[2:3], 0xffff ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[6:7] ; GFX11-NEXT: s_lshl_b32 s1, s1, 4 -; GFX11-NEXT: s_pack_ll_b32_b16 s6, s0, s0 -; GFX11-NEXT: s_lshl_b64 s[0:1], s[2:3], s1 +; GFX11-NEXT: s_pack_ll_b32_b16 s2, s0, s0 +; GFX11-NEXT: s_lshl_b64 s[0:1], 0xffff, s1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_bfi_b32 v1, s1, s6, v1 -; GFX11-NEXT: v_bfi_b32 v0, s0, s6, v0 +; GFX11-NEXT: v_bfi_b32 v1, s1, s2, v1 +; GFX11-NEXT: v_bfi_b32 v0, s0, s2, v0 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) diff --git a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll index 741164bc045062..29a96c227f2f0a 100644 --- a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll @@ -29,7 +29,7 @@ declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) #1 define internal fastcc void @svm_node_closure_bsdf(ptr addrspace(1) %sd, ptr %stack, <4 x i32> %node, ptr %offset, i32 %0, i8 %trunc, float %1, float %2, float %mul80, i1 %cmp412.old, <4 x i32> %3, float %4, i32 %5, i1 %cmp440, i1 %cmp442, i1 %or.cond1306, float %.op, ptr addrspace(1) %arrayidx.i.i2202, ptr addrspace(1) %retval.0.i.i22089, ptr addrspace(1) %retval.1.i221310, i1 %cmp575, ptr addrspace(1) %num_closure_left.i2215, i32 %6, i1 %cmp.i2216, i32 %7, i64 %idx.ext.i2223, i32 %sub5.i2221) #2 { ; GCN-LABEL: {{^}}svm_node_closure_bsdf: ; GCN-NOT: v_writelane_b32 -; GCN: s_movk_i32 s28, 0x60 +; GCN: s_movk_i32 s26, 0x60 ; GCN-NOT: s31 ; GCN-NOT: v_readlane_b32 ; GCN: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll index dee5b724934a0b..31295f2a543f2d 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll @@ -674,98 +674,91 @@ define amdgpu_kernel void @lds_ds_fmax(ptr addrspace(5) %out, ptr addrspace(3) % define amdgpu_kernel void @lds_ds_fmin_f64(ptr addrspace(5) %out, ptr addrspace(3) %ptrf, i32 %idx) { ; SI-LABEL: lds_ds_fmin_f64: ; SI: ; %bb.0: -; SI-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; SI-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 +; SI-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 +; SI-NEXT: s_load_dword s2, s[0:1], 0xb ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s10, -1 -; SI-NEXT: s_mov_b32 s11, 0xe8f000 -; SI-NEXT: s_add_u32 s8, s8, s3 -; SI-NEXT: s_addc_u32 s9, s9, 0 -; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s7, 0xe8f000 +; SI-NEXT: s_add_u32 s4, s4, s3 +; SI-NEXT: s_addc_u32 s5, s5, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_lshl_b32 s5, s4, 4 -; SI-NEXT: s_lshl_b32 s4, s4, 3 -; SI-NEXT: s_mov_b32 s3, 0x40450000 -; SI-NEXT: s_add_i32 s4, s4, 32 -; SI-NEXT: v_mov_b32_e32 v0, s2 -; SI-NEXT: v_mov_b32_e32 v2, s4 -; SI-NEXT: v_mov_b32_e32 v1, s3 +; SI-NEXT: s_lshl_b32 s3, s2, 4 +; SI-NEXT: s_lshl_b32 s2, s2, 3 +; SI-NEXT: v_mov_b32_e32 v0, 0 +; SI-NEXT: s_add_i32 s2, s2, 32 +; SI-NEXT: v_mov_b32_e32 v1, 0x40450000 +; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: s_mov_b32 m0, -1 ; SI-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] -; SI-NEXT: s_add_i32 s2, s5, 64 -; SI-NEXT: v_mov_b32_e32 v4, s2 -; SI-NEXT: ds_min_f64 v4, v[0:1] -; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: v_mov_b32_e32 v4, s1 +; SI-NEXT: s_add_i32 s1, s3, 64 +; SI-NEXT: v_mov_b32_e32 v5, s1 +; SI-NEXT: ds_min_f64 v5, v[0:1] ; SI-NEXT: s_waitcnt lgkmcnt(1) -; SI-NEXT: ds_min_rtn_f64 v[0:1], v0, v[2:3] +; SI-NEXT: ds_min_rtn_f64 v[0:1], v4, v[2:3] ; SI-NEXT: s_add_i32 s1, s0, 4 -; SI-NEXT: v_mov_b32_e32 v2, s1 +; SI-NEXT: v_mov_b32_e32 v3, s1 +; SI-NEXT: v_mov_b32_e32 v2, s0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v1, s0 -; SI-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; SI-NEXT: buffer_store_dword v1, v3, s[4:7], 0 offen +; SI-NEXT: buffer_store_dword v0, v2, s[4:7], 0 offen ; SI-NEXT: s_endpgm ; ; GFX7-LABEL: lds_ds_fmin_f64: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX7-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX7-NEXT: s_mov_b32 s10, -1 -; GFX7-NEXT: s_load_dword s4, s[0:1], 0xb +; GFX7-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 +; GFX7-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0xb ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX7-NEXT: s_mov_b32 s11, 0xe8f000 -; GFX7-NEXT: s_add_u32 s8, s8, s3 -; GFX7-NEXT: s_mov_b32 s2, 0 -; GFX7-NEXT: s_mov_b32 s3, 0x40450000 -; GFX7-NEXT: v_mov_b32_e32 v0, s2 -; GFX7-NEXT: s_addc_u32 s9, s9, 0 -; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_mov_b32 s7, 0xe8f000 +; GFX7-NEXT: s_add_u32 s4, s4, s3 +; GFX7-NEXT: s_addc_u32 s5, s5, 0 +; GFX7-NEXT: v_mov_b32_e32 v0, 0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshl_b32 s2, s4, 3 -; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: s_lshl_b32 s3, s2, 3 +; GFX7-NEXT: v_mov_b32_e32 v1, 0x40450000 +; GFX7-NEXT: v_mov_b32_e32 v2, s3 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] offset:32 -; GFX7-NEXT: s_lshl_b32 s2, s4, 4 -; GFX7-NEXT: v_mov_b32_e32 v4, s2 -; GFX7-NEXT: ds_min_f64 v4, v[0:1] offset:64 -; GFX7-NEXT: v_mov_b32_e32 v0, s1 +; GFX7-NEXT: s_lshl_b32 s2, s2, 4 +; GFX7-NEXT: v_mov_b32_e32 v5, s2 +; GFX7-NEXT: v_mov_b32_e32 v4, s1 +; GFX7-NEXT: ds_min_f64 v5, v[0:1] offset:64 ; GFX7-NEXT: s_waitcnt lgkmcnt(1) -; GFX7-NEXT: ds_min_rtn_f64 v[0:1], v0, v[2:3] +; GFX7-NEXT: ds_min_rtn_f64 v[0:1], v4, v[2:3] ; GFX7-NEXT: s_add_i32 s1, s0, 4 ; GFX7-NEXT: v_mov_b32_e32 v3, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: buffer_store_dword v1, v3, s[8:11], 0 offen -; GFX7-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen +; GFX7-NEXT: buffer_store_dword v1, v3, s[4:7], 0 offen +; GFX7-NEXT: buffer_store_dword v0, v2, s[4:7], 0 offen ; GFX7-NEXT: s_endpgm ; ; VI-LABEL: lds_ds_fmin_f64: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s90, -1 -; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dword s2, s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s90, -1 ; VI-NEXT: s_mov_b32 s91, 0xe80000 ; VI-NEXT: s_add_u32 s88, s88, s3 -; VI-NEXT: s_mov_b32 s2, 0 -; VI-NEXT: s_mov_b32 s3, 0x40450000 -; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: s_addc_u32 s89, s89, 0 -; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_lshl_b32 s2, s4, 3 -; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: s_lshl_b32 s3, s2, 3 +; VI-NEXT: v_mov_b32_e32 v1, 0x40450000 +; VI-NEXT: v_mov_b32_e32 v2, s3 ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] offset:32 -; VI-NEXT: s_lshl_b32 s2, s4, 4 -; VI-NEXT: v_mov_b32_e32 v4, s2 -; VI-NEXT: ds_min_f64 v4, v[0:1] offset:64 -; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: s_lshl_b32 s2, s2, 4 +; VI-NEXT: v_mov_b32_e32 v5, s2 +; VI-NEXT: v_mov_b32_e32 v4, s1 +; VI-NEXT: ds_min_f64 v5, v[0:1] offset:64 ; VI-NEXT: s_waitcnt lgkmcnt(1) -; VI-NEXT: ds_min_rtn_f64 v[0:1], v0, v[2:3] +; VI-NEXT: ds_min_rtn_f64 v[0:1], v4, v[2:3] ; VI-NEXT: s_add_i32 s1, s0, 4 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 @@ -783,11 +776,9 @@ define amdgpu_kernel void @lds_ds_fmin_f64(ptr addrspace(5) %out, ptr addrspace( ; GFX9-NEXT: s_add_u32 s8, s8, s3 ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s0, 0 -; GFX9-NEXT: s_mov_b32 s1, 0x40450000 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: s_addc_u32 s9, s9, 0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40450000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s0, s4, 3 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 @@ -814,17 +805,15 @@ define amdgpu_kernel void @lds_ds_fmin_f64(ptr addrspace(5) %out, ptr addrspace( ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c ; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; GFX10-NEXT: s_mov_b32 s0, 0 ; GFX10-NEXT: s_addc_u32 s9, s9, 0 -; GFX10-NEXT: s_mov_b32 s1, 0x40450000 -; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_mov_b32_e32 v1, 0x40450000 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshl_b32 s5, s4, 3 +; GFX10-NEXT: s_lshl_b32 s0, s4, 3 +; GFX10-NEXT: v_mov_b32_e32 v5, s3 +; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: s_lshl_b32 s0, s4, 4 -; GFX10-NEXT: v_mov_b32_e32 v2, s5 ; GFX10-NEXT: v_mov_b32_e32 v4, s0 -; GFX10-NEXT: v_mov_b32_e32 v5, s3 ; GFX10-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] offset:32 ; GFX10-NEXT: ds_min_f64 v4, v[0:1] offset:64 ; GFX10-NEXT: s_waitcnt lgkmcnt(1) @@ -838,16 +827,14 @@ define amdgpu_kernel void @lds_ds_fmin_f64(ptr addrspace(5) %out, ptr addrspace( ; GFX11-LABEL: lds_ds_fmin_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x2c +; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s2, 0 -; GFX11-NEXT: s_mov_b32 s3, 0x40450000 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_lshl_b32 s5, s4, 3 -; GFX11-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v2, s5 -; GFX11-NEXT: s_lshl_b32 s2, s4, 4 +; GFX11-NEXT: s_lshl_b32 s3, s2, 3 +; GFX11-NEXT: v_mov_b32_e32 v5, s1 +; GFX11-NEXT: v_dual_mov_b32 v1, 0x40450000 :: v_dual_mov_b32 v2, s3 +; GFX11-NEXT: s_lshl_b32 s2, s2, 4 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v4, s2 ; GFX11-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] offset:32 @@ -870,11 +857,11 @@ define amdgpu_kernel void @lds_ds_fmin_f64(ptr addrspace(5) %out, ptr addrspace( ; G_SI-NEXT: s_mov_b32 s2, 0 ; G_SI-NEXT: s_addc_u32 s9, s9, 0 ; G_SI-NEXT: s_mov_b32 s3, 0x40450000 -; G_SI-NEXT: v_mov_b32_e32 v0, s2 ; G_SI-NEXT: s_waitcnt lgkmcnt(0) ; G_SI-NEXT: s_add_i32 s4, s4, 4 -; G_SI-NEXT: v_mov_b32_e32 v1, s3 +; G_SI-NEXT: v_mov_b32_e32 v0, s2 ; G_SI-NEXT: s_lshl_b32 s2, s4, 3 +; G_SI-NEXT: v_mov_b32_e32 v1, s3 ; G_SI-NEXT: v_mov_b32_e32 v2, s2 ; G_SI-NEXT: s_mov_b32 m0, -1 ; G_SI-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] @@ -904,11 +891,11 @@ define amdgpu_kernel void @lds_ds_fmin_f64(ptr addrspace(5) %out, ptr addrspace( ; G_GFX7-NEXT: s_mov_b32 s2, 0 ; G_GFX7-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX7-NEXT: s_mov_b32 s3, 0x40450000 -; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX7-NEXT: s_add_i32 s4, s4, 4 -; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX7-NEXT: s_lshl_b32 s2, s4, 3 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX7-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX7-NEXT: s_mov_b32 m0, -1 ; G_GFX7-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] @@ -938,11 +925,11 @@ define amdgpu_kernel void @lds_ds_fmin_f64(ptr addrspace(5) %out, ptr addrspace( ; G_VI-NEXT: s_mov_b32 s2, 0 ; G_VI-NEXT: s_addc_u32 s89, s89, 0 ; G_VI-NEXT: s_mov_b32 s3, 0x40450000 -; G_VI-NEXT: v_mov_b32_e32 v0, s2 ; G_VI-NEXT: s_waitcnt lgkmcnt(0) ; G_VI-NEXT: s_add_i32 s4, s4, 4 -; G_VI-NEXT: v_mov_b32_e32 v1, s3 +; G_VI-NEXT: v_mov_b32_e32 v0, s2 ; G_VI-NEXT: s_lshl_b32 s2, s4, 3 +; G_VI-NEXT: v_mov_b32_e32 v1, s3 ; G_VI-NEXT: v_mov_b32_e32 v2, s2 ; G_VI-NEXT: s_mov_b32 m0, -1 ; G_VI-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] @@ -972,11 +959,11 @@ define amdgpu_kernel void @lds_ds_fmin_f64(ptr addrspace(5) %out, ptr addrspace( ; G_GFX9-NEXT: s_mov_b32 s0, 0 ; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX9-NEXT: s_mov_b32 s1, 0x40450000 -; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: s_add_i32 s4, s4, 4 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 +; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] ; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 @@ -1031,10 +1018,10 @@ define amdgpu_kernel void @lds_ds_fmin_f64(ptr addrspace(5) %out, ptr addrspace( ; G_GFX11-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX11-NEXT: s_add_i32 s4, s2, 4 ; G_GFX11-NEXT: s_mov_b32 s2, 0 -; G_GFX11-NEXT: s_lshl_b32 s5, s4, 3 ; G_GFX11-NEXT: s_mov_b32 s3, 0x40450000 +; G_GFX11-NEXT: s_lshl_b32 s5, s4, 3 ; G_GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v5, s1 -; G_GFX11-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v1, s3 +; G_GFX11-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v2, s5 ; G_GFX11-NEXT: s_lshl_b32 s2, s4, 4 ; G_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; G_GFX11-NEXT: v_mov_b32_e32 v4, s2 @@ -1060,98 +1047,91 @@ define amdgpu_kernel void @lds_ds_fmin_f64(ptr addrspace(5) %out, ptr addrspace( define amdgpu_kernel void @lds_ds_fmax_f64(ptr addrspace(5) %out, ptr addrspace(3) %ptrf, i32 %idx) { ; SI-LABEL: lds_ds_fmax_f64: ; SI: ; %bb.0: -; SI-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; SI-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 +; SI-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 +; SI-NEXT: s_load_dword s2, s[0:1], 0xb ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s10, -1 -; SI-NEXT: s_mov_b32 s11, 0xe8f000 -; SI-NEXT: s_add_u32 s8, s8, s3 -; SI-NEXT: s_addc_u32 s9, s9, 0 -; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s7, 0xe8f000 +; SI-NEXT: s_add_u32 s4, s4, s3 +; SI-NEXT: s_addc_u32 s5, s5, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_lshl_b32 s5, s4, 4 -; SI-NEXT: s_lshl_b32 s4, s4, 3 -; SI-NEXT: s_mov_b32 s3, 0x40450000 -; SI-NEXT: s_add_i32 s4, s4, 32 -; SI-NEXT: v_mov_b32_e32 v0, s2 -; SI-NEXT: v_mov_b32_e32 v2, s4 -; SI-NEXT: v_mov_b32_e32 v1, s3 +; SI-NEXT: s_lshl_b32 s3, s2, 4 +; SI-NEXT: s_lshl_b32 s2, s2, 3 +; SI-NEXT: v_mov_b32_e32 v0, 0 +; SI-NEXT: s_add_i32 s2, s2, 32 +; SI-NEXT: v_mov_b32_e32 v1, 0x40450000 +; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: s_mov_b32 m0, -1 ; SI-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] -; SI-NEXT: s_add_i32 s2, s5, 64 -; SI-NEXT: v_mov_b32_e32 v4, s2 -; SI-NEXT: ds_max_f64 v4, v[0:1] -; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: v_mov_b32_e32 v4, s1 +; SI-NEXT: s_add_i32 s1, s3, 64 +; SI-NEXT: v_mov_b32_e32 v5, s1 +; SI-NEXT: ds_max_f64 v5, v[0:1] ; SI-NEXT: s_waitcnt lgkmcnt(1) -; SI-NEXT: ds_max_rtn_f64 v[0:1], v0, v[2:3] +; SI-NEXT: ds_max_rtn_f64 v[0:1], v4, v[2:3] ; SI-NEXT: s_add_i32 s1, s0, 4 -; SI-NEXT: v_mov_b32_e32 v2, s1 +; SI-NEXT: v_mov_b32_e32 v3, s1 +; SI-NEXT: v_mov_b32_e32 v2, s0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v1, s0 -; SI-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; SI-NEXT: buffer_store_dword v1, v3, s[4:7], 0 offen +; SI-NEXT: buffer_store_dword v0, v2, s[4:7], 0 offen ; SI-NEXT: s_endpgm ; ; GFX7-LABEL: lds_ds_fmax_f64: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX7-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX7-NEXT: s_mov_b32 s10, -1 -; GFX7-NEXT: s_load_dword s4, s[0:1], 0xb +; GFX7-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 +; GFX7-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0xb ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX7-NEXT: s_mov_b32 s11, 0xe8f000 -; GFX7-NEXT: s_add_u32 s8, s8, s3 -; GFX7-NEXT: s_mov_b32 s2, 0 -; GFX7-NEXT: s_mov_b32 s3, 0x40450000 -; GFX7-NEXT: v_mov_b32_e32 v0, s2 -; GFX7-NEXT: s_addc_u32 s9, s9, 0 -; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_mov_b32 s7, 0xe8f000 +; GFX7-NEXT: s_add_u32 s4, s4, s3 +; GFX7-NEXT: s_addc_u32 s5, s5, 0 +; GFX7-NEXT: v_mov_b32_e32 v0, 0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshl_b32 s2, s4, 3 -; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: s_lshl_b32 s3, s2, 3 +; GFX7-NEXT: v_mov_b32_e32 v1, 0x40450000 +; GFX7-NEXT: v_mov_b32_e32 v2, s3 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] offset:32 -; GFX7-NEXT: s_lshl_b32 s2, s4, 4 -; GFX7-NEXT: v_mov_b32_e32 v4, s2 -; GFX7-NEXT: ds_max_f64 v4, v[0:1] offset:64 -; GFX7-NEXT: v_mov_b32_e32 v0, s1 +; GFX7-NEXT: s_lshl_b32 s2, s2, 4 +; GFX7-NEXT: v_mov_b32_e32 v5, s2 +; GFX7-NEXT: v_mov_b32_e32 v4, s1 +; GFX7-NEXT: ds_max_f64 v5, v[0:1] offset:64 ; GFX7-NEXT: s_waitcnt lgkmcnt(1) -; GFX7-NEXT: ds_max_rtn_f64 v[0:1], v0, v[2:3] +; GFX7-NEXT: ds_max_rtn_f64 v[0:1], v4, v[2:3] ; GFX7-NEXT: s_add_i32 s1, s0, 4 ; GFX7-NEXT: v_mov_b32_e32 v3, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: buffer_store_dword v1, v3, s[8:11], 0 offen -; GFX7-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen +; GFX7-NEXT: buffer_store_dword v1, v3, s[4:7], 0 offen +; GFX7-NEXT: buffer_store_dword v0, v2, s[4:7], 0 offen ; GFX7-NEXT: s_endpgm ; ; VI-LABEL: lds_ds_fmax_f64: ; VI: ; %bb.0: ; VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s90, -1 -; VI-NEXT: s_load_dword s4, s[0:1], 0x2c +; VI-NEXT: s_load_dword s2, s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s90, -1 ; VI-NEXT: s_mov_b32 s91, 0xe80000 ; VI-NEXT: s_add_u32 s88, s88, s3 -; VI-NEXT: s_mov_b32 s2, 0 -; VI-NEXT: s_mov_b32 s3, 0x40450000 -; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: s_addc_u32 s89, s89, 0 -; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_lshl_b32 s2, s4, 3 -; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: s_lshl_b32 s3, s2, 3 +; VI-NEXT: v_mov_b32_e32 v1, 0x40450000 +; VI-NEXT: v_mov_b32_e32 v2, s3 ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] offset:32 -; VI-NEXT: s_lshl_b32 s2, s4, 4 -; VI-NEXT: v_mov_b32_e32 v4, s2 -; VI-NEXT: ds_max_f64 v4, v[0:1] offset:64 -; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: s_lshl_b32 s2, s2, 4 +; VI-NEXT: v_mov_b32_e32 v5, s2 +; VI-NEXT: v_mov_b32_e32 v4, s1 +; VI-NEXT: ds_max_f64 v5, v[0:1] offset:64 ; VI-NEXT: s_waitcnt lgkmcnt(1) -; VI-NEXT: ds_max_rtn_f64 v[0:1], v0, v[2:3] +; VI-NEXT: ds_max_rtn_f64 v[0:1], v4, v[2:3] ; VI-NEXT: s_add_i32 s1, s0, 4 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 @@ -1169,11 +1149,9 @@ define amdgpu_kernel void @lds_ds_fmax_f64(ptr addrspace(5) %out, ptr addrspace( ; GFX9-NEXT: s_add_u32 s8, s8, s3 ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s0, 0 -; GFX9-NEXT: s_mov_b32 s1, 0x40450000 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: s_addc_u32 s9, s9, 0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40450000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s0, s4, 3 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 @@ -1200,17 +1178,15 @@ define amdgpu_kernel void @lds_ds_fmax_f64(ptr addrspace(5) %out, ptr addrspace( ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c ; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; GFX10-NEXT: s_mov_b32 s0, 0 ; GFX10-NEXT: s_addc_u32 s9, s9, 0 -; GFX10-NEXT: s_mov_b32 s1, 0x40450000 -; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_mov_b32_e32 v1, 0x40450000 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshl_b32 s5, s4, 3 +; GFX10-NEXT: s_lshl_b32 s0, s4, 3 +; GFX10-NEXT: v_mov_b32_e32 v5, s3 +; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: s_lshl_b32 s0, s4, 4 -; GFX10-NEXT: v_mov_b32_e32 v2, s5 ; GFX10-NEXT: v_mov_b32_e32 v4, s0 -; GFX10-NEXT: v_mov_b32_e32 v5, s3 ; GFX10-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] offset:32 ; GFX10-NEXT: ds_max_f64 v4, v[0:1] offset:64 ; GFX10-NEXT: s_waitcnt lgkmcnt(1) @@ -1224,16 +1200,14 @@ define amdgpu_kernel void @lds_ds_fmax_f64(ptr addrspace(5) %out, ptr addrspace( ; GFX11-LABEL: lds_ds_fmax_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x2c +; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s2, 0 -; GFX11-NEXT: s_mov_b32 s3, 0x40450000 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_lshl_b32 s5, s4, 3 -; GFX11-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v2, s5 -; GFX11-NEXT: s_lshl_b32 s2, s4, 4 +; GFX11-NEXT: s_lshl_b32 s3, s2, 3 +; GFX11-NEXT: v_mov_b32_e32 v5, s1 +; GFX11-NEXT: v_dual_mov_b32 v1, 0x40450000 :: v_dual_mov_b32 v2, s3 +; GFX11-NEXT: s_lshl_b32 s2, s2, 4 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v4, s2 ; GFX11-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] offset:32 @@ -1256,11 +1230,11 @@ define amdgpu_kernel void @lds_ds_fmax_f64(ptr addrspace(5) %out, ptr addrspace( ; G_SI-NEXT: s_mov_b32 s2, 0 ; G_SI-NEXT: s_addc_u32 s9, s9, 0 ; G_SI-NEXT: s_mov_b32 s3, 0x40450000 -; G_SI-NEXT: v_mov_b32_e32 v0, s2 ; G_SI-NEXT: s_waitcnt lgkmcnt(0) ; G_SI-NEXT: s_add_i32 s4, s4, 4 -; G_SI-NEXT: v_mov_b32_e32 v1, s3 +; G_SI-NEXT: v_mov_b32_e32 v0, s2 ; G_SI-NEXT: s_lshl_b32 s2, s4, 3 +; G_SI-NEXT: v_mov_b32_e32 v1, s3 ; G_SI-NEXT: v_mov_b32_e32 v2, s2 ; G_SI-NEXT: s_mov_b32 m0, -1 ; G_SI-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] @@ -1290,11 +1264,11 @@ define amdgpu_kernel void @lds_ds_fmax_f64(ptr addrspace(5) %out, ptr addrspace( ; G_GFX7-NEXT: s_mov_b32 s2, 0 ; G_GFX7-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX7-NEXT: s_mov_b32 s3, 0x40450000 -; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX7-NEXT: s_add_i32 s4, s4, 4 -; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX7-NEXT: s_lshl_b32 s2, s4, 3 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX7-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX7-NEXT: s_mov_b32 m0, -1 ; G_GFX7-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] @@ -1324,11 +1298,11 @@ define amdgpu_kernel void @lds_ds_fmax_f64(ptr addrspace(5) %out, ptr addrspace( ; G_VI-NEXT: s_mov_b32 s2, 0 ; G_VI-NEXT: s_addc_u32 s89, s89, 0 ; G_VI-NEXT: s_mov_b32 s3, 0x40450000 -; G_VI-NEXT: v_mov_b32_e32 v0, s2 ; G_VI-NEXT: s_waitcnt lgkmcnt(0) ; G_VI-NEXT: s_add_i32 s4, s4, 4 -; G_VI-NEXT: v_mov_b32_e32 v1, s3 +; G_VI-NEXT: v_mov_b32_e32 v0, s2 ; G_VI-NEXT: s_lshl_b32 s2, s4, 3 +; G_VI-NEXT: v_mov_b32_e32 v1, s3 ; G_VI-NEXT: v_mov_b32_e32 v2, s2 ; G_VI-NEXT: s_mov_b32 m0, -1 ; G_VI-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] @@ -1358,11 +1332,11 @@ define amdgpu_kernel void @lds_ds_fmax_f64(ptr addrspace(5) %out, ptr addrspace( ; G_GFX9-NEXT: s_mov_b32 s0, 0 ; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX9-NEXT: s_mov_b32 s1, 0x40450000 -; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: s_add_i32 s4, s4, 4 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 +; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] ; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 @@ -1417,10 +1391,10 @@ define amdgpu_kernel void @lds_ds_fmax_f64(ptr addrspace(5) %out, ptr addrspace( ; G_GFX11-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX11-NEXT: s_add_i32 s4, s2, 4 ; G_GFX11-NEXT: s_mov_b32 s2, 0 -; G_GFX11-NEXT: s_lshl_b32 s5, s4, 3 ; G_GFX11-NEXT: s_mov_b32 s3, 0x40450000 +; G_GFX11-NEXT: s_lshl_b32 s5, s4, 3 ; G_GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v5, s1 -; G_GFX11-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v1, s3 +; G_GFX11-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v2, s5 ; G_GFX11-NEXT: s_lshl_b32 s2, s4, 4 ; G_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; G_GFX11-NEXT: v_mov_b32_e32 v4, s2 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll index 5a950d803e9c5d..af59d62b2e2d05 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11,SDAG-GFX11 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,SDAG-GFX10 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-GFX11 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-GFX10 %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11,GISEL-GFX11 %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GISEL-GFX10 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL-GFX11 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL-GFX10 %s declare i32 @llvm.amdgcn.fcmp.f32(float, float, i32) #0 declare i32 @llvm.amdgcn.fcmp.f64(double, double, i32) #0 @@ -962,10 +962,8 @@ define amdgpu_kernel void @v_fcmp_f64_oeq(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_oeq: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_eq_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_eq_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -976,11 +974,9 @@ define amdgpu_kernel void @v_fcmp_f64_oeq(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_oeq: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_eq_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_eq_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -988,11 +984,9 @@ define amdgpu_kernel void @v_fcmp_f64_oeq(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_oeq: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_eq_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_eq_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1003,11 +997,9 @@ define amdgpu_kernel void @v_fcmp_f64_oeq(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_oeq: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_eq_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_eq_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1020,10 +1012,8 @@ define amdgpu_kernel void @v_fcmp_f64_one(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_one: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_neq_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1034,11 +1024,9 @@ define amdgpu_kernel void @v_fcmp_f64_one(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_one: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_neq_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1046,11 +1034,9 @@ define amdgpu_kernel void @v_fcmp_f64_one(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_one: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_neq_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1061,11 +1047,9 @@ define amdgpu_kernel void @v_fcmp_f64_one(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_one: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_neq_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1078,10 +1062,8 @@ define amdgpu_kernel void @v_fcmp_f64_ogt(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_ogt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_gt_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_lt_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1092,11 +1074,9 @@ define amdgpu_kernel void @v_fcmp_f64_ogt(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_ogt: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_gt_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_lt_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1104,11 +1084,9 @@ define amdgpu_kernel void @v_fcmp_f64_ogt(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_ogt: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_gt_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_lt_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1119,11 +1097,9 @@ define amdgpu_kernel void @v_fcmp_f64_ogt(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_ogt: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_gt_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_lt_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1136,10 +1112,8 @@ define amdgpu_kernel void @v_fcmp_f64_oge(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_oge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_ge_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_le_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1150,11 +1124,9 @@ define amdgpu_kernel void @v_fcmp_f64_oge(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_oge: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_ge_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_le_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1162,11 +1134,9 @@ define amdgpu_kernel void @v_fcmp_f64_oge(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_oge: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_ge_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_le_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1177,11 +1147,9 @@ define amdgpu_kernel void @v_fcmp_f64_oge(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_oge: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_ge_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_le_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1194,10 +1162,8 @@ define amdgpu_kernel void @v_fcmp_f64_olt(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_olt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_lt_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_gt_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1208,11 +1174,9 @@ define amdgpu_kernel void @v_fcmp_f64_olt(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_olt: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_lt_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_gt_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1220,11 +1184,9 @@ define amdgpu_kernel void @v_fcmp_f64_olt(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_olt: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_lt_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_gt_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1235,11 +1197,9 @@ define amdgpu_kernel void @v_fcmp_f64_olt(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_olt: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_lt_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_gt_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1252,10 +1212,8 @@ define amdgpu_kernel void @v_fcmp_f64_ole(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_ole: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_le_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_ge_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1266,11 +1224,9 @@ define amdgpu_kernel void @v_fcmp_f64_ole(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_ole: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_le_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_ge_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1278,11 +1234,9 @@ define amdgpu_kernel void @v_fcmp_f64_ole(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_ole: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_le_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_ge_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1293,11 +1247,9 @@ define amdgpu_kernel void @v_fcmp_f64_ole(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_ole: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_le_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_ge_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1310,10 +1262,8 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_ueq: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_nlg_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_nlg_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1324,11 +1274,9 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_ueq: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_nlg_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_nlg_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1336,11 +1284,9 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_ueq: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_nlg_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_nlg_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1351,11 +1297,9 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_ueq: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_nlg_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_nlg_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1368,10 +1312,8 @@ define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_o: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_o_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_o_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1382,11 +1324,9 @@ define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_o: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_o_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_o_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1394,11 +1334,9 @@ define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_o: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_o_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_o_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1409,11 +1347,9 @@ define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_o: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_o_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_o_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1426,10 +1362,8 @@ define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_uo: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_u_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1440,11 +1374,9 @@ define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_uo: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_u_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1452,11 +1384,9 @@ define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_uo: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_u_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1467,11 +1397,9 @@ define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_uo: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_u_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_u_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1484,10 +1412,8 @@ define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_une: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_neq_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1498,11 +1424,9 @@ define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_une: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_neq_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1510,11 +1434,9 @@ define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_une: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_neq_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1525,11 +1447,9 @@ define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_une: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_neq_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1542,10 +1462,8 @@ define amdgpu_kernel void @v_fcmp_f64_ugt(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_ugt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_nle_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_nge_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1556,11 +1474,9 @@ define amdgpu_kernel void @v_fcmp_f64_ugt(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_ugt: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_nle_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_nge_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1568,11 +1484,9 @@ define amdgpu_kernel void @v_fcmp_f64_ugt(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_ugt: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_nle_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_nge_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1583,11 +1497,9 @@ define amdgpu_kernel void @v_fcmp_f64_ugt(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_ugt: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_nle_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_nge_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1600,10 +1512,8 @@ define amdgpu_kernel void @v_fcmp_f64_uge(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_uge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_nlt_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_ngt_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1614,11 +1524,9 @@ define amdgpu_kernel void @v_fcmp_f64_uge(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_uge: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_nlt_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_ngt_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1626,11 +1534,9 @@ define amdgpu_kernel void @v_fcmp_f64_uge(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_uge: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_nlt_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_ngt_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1641,11 +1547,9 @@ define amdgpu_kernel void @v_fcmp_f64_uge(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_uge: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_nlt_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_ngt_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1658,10 +1562,8 @@ define amdgpu_kernel void @v_fcmp_f64_ult(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_ult: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_nge_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_nle_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1672,11 +1574,9 @@ define amdgpu_kernel void @v_fcmp_f64_ult(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_ult: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_nge_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_nle_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1684,11 +1584,9 @@ define amdgpu_kernel void @v_fcmp_f64_ult(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_ult: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_nge_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_nle_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1699,11 +1597,9 @@ define amdgpu_kernel void @v_fcmp_f64_ult(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_ult: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_nge_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_nle_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1716,10 +1612,8 @@ define amdgpu_kernel void @v_fcmp_f64_ule(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_ule: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_ngt_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_nlt_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1730,11 +1624,9 @@ define amdgpu_kernel void @v_fcmp_f64_ule(ptr addrspace(1) %out, double %src) { ; SDAG-GFX10-LABEL: v_fcmp_f64_ule: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b32 s4, 0 -; SDAG-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_ngt_f64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_nlt_f64_e64 s2, 0x40590000, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1742,11 +1634,9 @@ define amdgpu_kernel void @v_fcmp_f64_ule(ptr addrspace(1) %out, double %src) { ; GISEL-GFX11-LABEL: v_fcmp_f64_ule: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_ngt_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_nlt_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1757,11 +1647,9 @@ define amdgpu_kernel void @v_fcmp_f64_ule(ptr addrspace(1) %out, double %src) { ; GISEL-GFX10-LABEL: v_fcmp_f64_ule: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b32 s4, 0 -; GISEL-GFX10-NEXT: s_mov_b32 s5, 0x40590000 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_ngt_f64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_nlt_f64_e64 s2, 0x40590000, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -2754,7 +2642,3 @@ define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) { } attributes #0 = { nounwind readnone convergent } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} -; GFX10: {{.*}} -; GFX11: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll index e2bdcfa6bbddc8..8c76df3e041fdf 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll @@ -1046,11 +1046,9 @@ define amdgpu_kernel void @v_fcmp_f64_oeq(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_oeq: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_eq_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_eq_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1059,32 +1057,18 @@ define amdgpu_kernel void @v_fcmp_f64_oeq(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_oeq: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_eq_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_oeq: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_eq_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_oeq: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_oeq: ; VI-SDAG: ; %bb.0: @@ -1103,10 +1087,8 @@ define amdgpu_kernel void @v_fcmp_f64_oeq(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_oeq: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_eq_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1124,11 +1106,9 @@ define amdgpu_kernel void @v_fcmp_f64_one(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_one: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_neq_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1137,32 +1117,18 @@ define amdgpu_kernel void @v_fcmp_f64_one(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_one: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_one: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_one: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_one: ; VI-SDAG: ; %bb.0: @@ -1181,10 +1147,8 @@ define amdgpu_kernel void @v_fcmp_f64_one(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_one: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1202,11 +1166,9 @@ define amdgpu_kernel void @v_fcmp_f64_ogt(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_ogt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_gt_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_lt_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1215,32 +1177,18 @@ define amdgpu_kernel void @v_fcmp_f64_ogt(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_ogt: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_gt_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_ogt: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_gt_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_ogt: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_gt_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_ogt: ; VI-SDAG: ; %bb.0: @@ -1259,10 +1207,8 @@ define amdgpu_kernel void @v_fcmp_f64_ogt(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_ogt: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_gt_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1280,11 +1226,9 @@ define amdgpu_kernel void @v_fcmp_f64_oge(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_oge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_ge_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_le_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1293,32 +1237,18 @@ define amdgpu_kernel void @v_fcmp_f64_oge(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_oge: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_ge_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_oge: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_ge_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_oge: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_ge_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_oge: ; VI-SDAG: ; %bb.0: @@ -1337,10 +1267,8 @@ define amdgpu_kernel void @v_fcmp_f64_oge(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_oge: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_ge_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1358,11 +1286,9 @@ define amdgpu_kernel void @v_fcmp_f64_olt(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_olt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_lt_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_gt_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1371,32 +1297,18 @@ define amdgpu_kernel void @v_fcmp_f64_olt(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_olt: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_lt_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_olt: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_lt_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_olt: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_lt_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_olt: ; VI-SDAG: ; %bb.0: @@ -1415,10 +1327,8 @@ define amdgpu_kernel void @v_fcmp_f64_olt(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_olt: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1436,11 +1346,9 @@ define amdgpu_kernel void @v_fcmp_f64_ole(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_ole: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_le_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_ge_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1449,32 +1357,18 @@ define amdgpu_kernel void @v_fcmp_f64_ole(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_ole: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_le_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_ole: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_le_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_ole: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_le_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_ole: ; VI-SDAG: ; %bb.0: @@ -1493,10 +1387,8 @@ define amdgpu_kernel void @v_fcmp_f64_ole(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_ole: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_le_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1514,11 +1406,9 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_ueq: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_nlg_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_nlg_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1527,32 +1417,18 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_ueq: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_nlg_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_ueq: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_nlg_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_ueq: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlg_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_ueq: ; VI-SDAG: ; %bb.0: @@ -1571,10 +1447,8 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_ueq: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_nlg_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1592,11 +1466,9 @@ define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_o: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_o_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1605,32 +1477,18 @@ define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_o: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_o: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_o: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_o: ; VI-SDAG: ; %bb.0: @@ -1649,10 +1507,8 @@ define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_o: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1670,11 +1526,9 @@ define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_uo: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_u_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1683,32 +1537,18 @@ define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_uo: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_uo: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_uo: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_uo: ; VI-SDAG: ; %bb.0: @@ -1727,10 +1567,8 @@ define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_uo: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1748,11 +1586,9 @@ define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_une: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_neq_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1761,32 +1597,18 @@ define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_une: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_une: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_une: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_une: ; VI-SDAG: ; %bb.0: @@ -1805,10 +1627,8 @@ define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_une: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1826,11 +1646,9 @@ define amdgpu_kernel void @v_fcmp_f64_ugt(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_ugt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_nle_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_nge_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1839,32 +1657,18 @@ define amdgpu_kernel void @v_fcmp_f64_ugt(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_ugt: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_nle_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_ugt: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_nle_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_ugt: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_nle_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_ugt: ; VI-SDAG: ; %bb.0: @@ -1883,10 +1687,8 @@ define amdgpu_kernel void @v_fcmp_f64_ugt(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_ugt: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_nle_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1904,11 +1706,9 @@ define amdgpu_kernel void @v_fcmp_f64_uge(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_uge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_nlt_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_ngt_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1917,32 +1717,18 @@ define amdgpu_kernel void @v_fcmp_f64_uge(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_uge: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_nlt_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_uge: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_nlt_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_uge: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_uge: ; VI-SDAG: ; %bb.0: @@ -1961,10 +1747,8 @@ define amdgpu_kernel void @v_fcmp_f64_uge(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_uge: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_nlt_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1982,11 +1766,9 @@ define amdgpu_kernel void @v_fcmp_f64_ult(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_ult: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_nge_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_nle_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1995,32 +1777,18 @@ define amdgpu_kernel void @v_fcmp_f64_ult(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_ult: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_nge_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_ult: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_nge_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_ult: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_nge_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_ult: ; VI-SDAG: ; %bb.0: @@ -2039,10 +1807,8 @@ define amdgpu_kernel void @v_fcmp_f64_ult(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_ult: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_nge_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -2060,11 +1826,9 @@ define amdgpu_kernel void @v_fcmp_f64_ule(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_ule: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b32 s4, 0 -; GFX11-NEXT: s_mov_b32 s5, 0x40590000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_ngt_f64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_nlt_f64_e64 s[2:3], 0x40590000, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -2073,32 +1837,18 @@ define amdgpu_kernel void @v_fcmp_f64_ule(ptr addrspace(1) %out, double %src) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: v_fcmp_f64_ule: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_ngt_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-SDAG-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] -; GFX9-SDAG-NEXT: s_endpgm -; -; GFX9-GISEL-LABEL: v_fcmp_f64_ule: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cmp_ngt_f64_e64 s[2:3], s[2:3], v[0:1] -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-GISEL-NEXT: s_endpgm +; GFX9-LABEL: v_fcmp_f64_ule: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; VI-SDAG-LABEL: v_fcmp_f64_ule: ; VI-SDAG: ; %bb.0: @@ -2117,10 +1867,8 @@ define amdgpu_kernel void @v_fcmp_f64_ule(ptr addrspace(1) %out, double %src) { ; VI-GISEL-LABEL: v_fcmp_f64_ule: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_mov_b32 s5, 0x40590000 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_ngt_f64_e64 s[2:3], s[2:3], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll index 44d1cfb96146eb..7a492f51cce49f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll @@ -633,9 +633,8 @@ define amdgpu_kernel void @v_icmp_i64_eq(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_i64_eq: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_eq_u64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_eq_u64_e64 s2, 0x64, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -646,10 +645,9 @@ define amdgpu_kernel void @v_icmp_i64_eq(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX10-LABEL: v_icmp_i64_eq: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_eq_u64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_eq_u64_e64 s2, 0x64, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -657,10 +655,9 @@ define amdgpu_kernel void @v_icmp_i64_eq(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX11-LABEL: v_icmp_i64_eq: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_eq_u64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_eq_u64_e64 s2, 0x64, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -671,10 +668,9 @@ define amdgpu_kernel void @v_icmp_i64_eq(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX10-LABEL: v_icmp_i64_eq: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_eq_u64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_eq_u64_e64 s2, 0x64, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -687,9 +683,8 @@ define amdgpu_kernel void @v_icmp_i64_ne(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_i64_ne: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_ne_u64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_ne_u64_e64 s2, 0x64, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -700,10 +695,9 @@ define amdgpu_kernel void @v_icmp_i64_ne(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX10-LABEL: v_icmp_i64_ne: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_ne_u64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_ne_u64_e64 s2, 0x64, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -711,10 +705,9 @@ define amdgpu_kernel void @v_icmp_i64_ne(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX11-LABEL: v_icmp_i64_ne: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_ne_u64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_ne_u64_e64 s2, 0x64, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -725,10 +718,9 @@ define amdgpu_kernel void @v_icmp_i64_ne(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX10-LABEL: v_icmp_i64_ne: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_ne_u64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_ne_u64_e64 s2, 0x64, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -741,9 +733,8 @@ define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_u64_ugt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_gt_u64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_lt_u64_e64 s2, 0x64, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -754,10 +745,9 @@ define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX10-LABEL: v_icmp_u64_ugt: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_gt_u64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_lt_u64_e64 s2, 0x64, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -765,10 +755,9 @@ define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX11-LABEL: v_icmp_u64_ugt: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_gt_u64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_lt_u64_e64 s2, 0x64, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -779,10 +768,9 @@ define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX10-LABEL: v_icmp_u64_ugt: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_gt_u64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_lt_u64_e64 s2, 0x64, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -795,9 +783,8 @@ define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_u64_uge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_ge_u64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_le_u64_e64 s2, 0x64, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -808,10 +795,9 @@ define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX10-LABEL: v_icmp_u64_uge: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_ge_u64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_le_u64_e64 s2, 0x64, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -819,10 +805,9 @@ define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX11-LABEL: v_icmp_u64_uge: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_ge_u64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_le_u64_e64 s2, 0x64, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -833,10 +818,9 @@ define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX10-LABEL: v_icmp_u64_uge: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_ge_u64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_le_u64_e64 s2, 0x64, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -849,9 +833,8 @@ define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_u64_ult: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_lt_u64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_gt_u64_e64 s2, 0x64, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -862,10 +845,9 @@ define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX10-LABEL: v_icmp_u64_ult: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_lt_u64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_gt_u64_e64 s2, 0x64, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -873,10 +855,9 @@ define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX11-LABEL: v_icmp_u64_ult: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_lt_u64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_gt_u64_e64 s2, 0x64, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -887,10 +868,9 @@ define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX10-LABEL: v_icmp_u64_ult: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_lt_u64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_gt_u64_e64 s2, 0x64, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -903,9 +883,8 @@ define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_u64_ule: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_le_u64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_ge_u64_e64 s2, 0x64, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -916,10 +895,9 @@ define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX10-LABEL: v_icmp_u64_ule: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_le_u64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_ge_u64_e64 s2, 0x64, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -927,10 +905,9 @@ define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX11-LABEL: v_icmp_u64_ule: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_le_u64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_ge_u64_e64 s2, 0x64, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -941,10 +918,9 @@ define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX10-LABEL: v_icmp_u64_ule: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_le_u64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_ge_u64_e64 s2, 0x64, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -957,9 +933,8 @@ define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_i64_sgt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_gt_i64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_lt_i64_e64 s2, 0x64, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -970,10 +945,9 @@ define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX10-LABEL: v_icmp_i64_sgt: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_gt_i64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_lt_i64_e64 s2, 0x64, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -981,10 +955,9 @@ define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX11-LABEL: v_icmp_i64_sgt: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_gt_i64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_lt_i64_e64 s2, 0x64, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -995,10 +968,9 @@ define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX10-LABEL: v_icmp_i64_sgt: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_gt_i64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_lt_i64_e64 s2, 0x64, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1011,9 +983,8 @@ define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_i64_sge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_ge_i64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_le_i64_e64 s2, 0x64, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1024,10 +995,9 @@ define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX10-LABEL: v_icmp_i64_sge: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_ge_i64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_le_i64_e64 s2, 0x64, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1035,10 +1005,9 @@ define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX11-LABEL: v_icmp_i64_sge: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_ge_i64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_le_i64_e64 s2, 0x64, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1049,10 +1018,9 @@ define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX10-LABEL: v_icmp_i64_sge: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_ge_i64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_le_i64_e64 s2, 0x64, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1065,9 +1033,8 @@ define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_i64_slt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_gt_i64_e64 s2, 0x64, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1078,10 +1045,9 @@ define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX10-LABEL: v_icmp_i64_slt: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_gt_i64_e64 s2, 0x64, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1089,10 +1055,9 @@ define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX11-LABEL: v_icmp_i64_slt: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_gt_i64_e64 s2, 0x64, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1103,10 +1068,9 @@ define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX10-LABEL: v_icmp_i64_slt: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_gt_i64_e64 s2, 0x64, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm @@ -1119,9 +1083,8 @@ define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_i64_sle: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; SDAG-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-NEXT: v_cmp_le_i64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX11-NEXT: v_cmp_ge_i64_e64 s2, 0x64, s[2:3] ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -1132,10 +1095,9 @@ define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX10-LABEL: v_icmp_i64_sle: ; SDAG-GFX10: ; %bb.0: ; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX10-NEXT: v_cmp_le_i64_e64 s2, s[2:3], s[4:5] +; SDAG-GFX10-NEXT: v_cmp_ge_i64_e64 s2, 0x64, s[2:3] ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 ; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; SDAG-GFX10-NEXT: s_endpgm @@ -1143,10 +1105,9 @@ define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX11-LABEL: v_icmp_i64_sle: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GISEL-GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-NEXT: v_cmp_le_i64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX11-NEXT: v_cmp_ge_i64_e64 s2, 0x64, s[2:3] ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] @@ -1157,10 +1118,9 @@ define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) { ; GISEL-GFX10-LABEL: v_icmp_i64_sle: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX10-NEXT: s_mov_b64 s[4:5], 0x64 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX10-NEXT: v_cmp_le_i64_e64 s2, s[2:3], s[4:5] +; GISEL-GFX10-NEXT: v_cmp_ge_i64_e64 s2, 0x64, s[2:3] ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll index 5d1aa7cbb9992b..80e0202962462c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll @@ -689,10 +689,9 @@ define amdgpu_kernel void @v_icmp_i64_eq(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_i64_eq: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_eq_u64_e64 s[2:3], 0x64, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -715,24 +714,24 @@ define amdgpu_kernel void @v_icmp_i64_eq(ptr addrspace(1) %out, i64 %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; SDAG-VI-NEXT: s_endpgm ; -; SDAG-GFX9-LABEL: v_icmp_i64_eq: -; SDAG-GFX9: ; %bb.0: -; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64 -; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX9-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1] -; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3 -; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] -; SDAG-GFX9-NEXT: s_endpgm +; GFX9-LABEL: v_icmp_i64_eq: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x64 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; GISEL-VI-LABEL: v_icmp_i64_eq: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-VI-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1] ; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 @@ -741,20 +740,6 @@ define amdgpu_kernel void @v_icmp_i64_eq(ptr addrspace(1) %out, i64 %src) { ; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm -; -; GISEL-GFX9-LABEL: v_icmp_i64_eq: -; GISEL-GFX9: ; %bb.0: -; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX9-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1] -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32) store i64 %result, ptr addrspace(1) %out ret void @@ -764,10 +749,9 @@ define amdgpu_kernel void @v_icmp_i64_ne(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_i64_ne: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_ne_u64_e64 s[2:3], 0x64, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -790,24 +774,24 @@ define amdgpu_kernel void @v_icmp_i64_ne(ptr addrspace(1) %out, i64 %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; SDAG-VI-NEXT: s_endpgm ; -; SDAG-GFX9-LABEL: v_icmp_i64_ne: -; SDAG-GFX9: ; %bb.0: -; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64 -; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX9-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1] -; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3 -; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] -; SDAG-GFX9-NEXT: s_endpgm +; GFX9-LABEL: v_icmp_i64_ne: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x64 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; GISEL-VI-LABEL: v_icmp_i64_ne: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-VI-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1] ; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 @@ -816,20 +800,6 @@ define amdgpu_kernel void @v_icmp_i64_ne(ptr addrspace(1) %out, i64 %src) { ; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm -; -; GISEL-GFX9-LABEL: v_icmp_i64_ne: -; GISEL-GFX9: ; %bb.0: -; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX9-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1] -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33) store i64 %result, ptr addrspace(1) %out ret void @@ -839,10 +809,9 @@ define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_u64_ugt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_lt_u64_e64 s[2:3], 0x64, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -865,24 +834,24 @@ define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; SDAG-VI-NEXT: s_endpgm ; -; SDAG-GFX9-LABEL: v_icmp_u64_ugt: -; SDAG-GFX9: ; %bb.0: -; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64 -; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX9-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1] -; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3 -; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] -; SDAG-GFX9-NEXT: s_endpgm +; GFX9-LABEL: v_icmp_u64_ugt: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x64 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; GISEL-VI-LABEL: v_icmp_u64_ugt: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-VI-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1] ; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 @@ -891,20 +860,6 @@ define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) { ; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm -; -; GISEL-GFX9-LABEL: v_icmp_u64_ugt: -; GISEL-GFX9: ; %bb.0: -; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX9-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1] -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34) store i64 %result, ptr addrspace(1) %out ret void @@ -914,10 +869,9 @@ define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_u64_uge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_le_u64_e64 s[2:3], 0x64, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -940,24 +894,24 @@ define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; SDAG-VI-NEXT: s_endpgm ; -; SDAG-GFX9-LABEL: v_icmp_u64_uge: -; SDAG-GFX9: ; %bb.0: -; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64 -; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX9-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1] -; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3 -; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] -; SDAG-GFX9-NEXT: s_endpgm +; GFX9-LABEL: v_icmp_u64_uge: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x64 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; GISEL-VI-LABEL: v_icmp_u64_uge: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-VI-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1] ; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 @@ -966,20 +920,6 @@ define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) { ; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm -; -; GISEL-GFX9-LABEL: v_icmp_u64_uge: -; GISEL-GFX9: ; %bb.0: -; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX9-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1] -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35) store i64 %result, ptr addrspace(1) %out ret void @@ -989,10 +929,9 @@ define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_u64_ult: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_gt_u64_e64 s[2:3], 0x64, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1015,24 +954,24 @@ define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; SDAG-VI-NEXT: s_endpgm ; -; SDAG-GFX9-LABEL: v_icmp_u64_ult: -; SDAG-GFX9: ; %bb.0: -; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64 -; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX9-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1] -; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3 -; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] -; SDAG-GFX9-NEXT: s_endpgm +; GFX9-LABEL: v_icmp_u64_ult: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x64 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; GISEL-VI-LABEL: v_icmp_u64_ult: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-VI-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1] ; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 @@ -1041,20 +980,6 @@ define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) { ; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm -; -; GISEL-GFX9-LABEL: v_icmp_u64_ult: -; GISEL-GFX9: ; %bb.0: -; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX9-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1] -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36) store i64 %result, ptr addrspace(1) %out ret void @@ -1064,10 +989,9 @@ define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_u64_ule: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_ge_u64_e64 s[2:3], 0x64, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1090,24 +1014,24 @@ define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; SDAG-VI-NEXT: s_endpgm ; -; SDAG-GFX9-LABEL: v_icmp_u64_ule: -; SDAG-GFX9: ; %bb.0: -; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64 -; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX9-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1] -; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3 -; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] -; SDAG-GFX9-NEXT: s_endpgm +; GFX9-LABEL: v_icmp_u64_ule: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x64 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; GISEL-VI-LABEL: v_icmp_u64_ule: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-VI-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1] ; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 @@ -1116,20 +1040,6 @@ define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) { ; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm -; -; GISEL-GFX9-LABEL: v_icmp_u64_ule: -; GISEL-GFX9: ; %bb.0: -; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX9-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1] -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37) store i64 %result, ptr addrspace(1) %out ret void @@ -1139,10 +1049,9 @@ define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_i64_sgt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_lt_i64_e64 s[2:3], 0x64, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1165,24 +1074,24 @@ define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; SDAG-VI-NEXT: s_endpgm ; -; SDAG-GFX9-LABEL: v_icmp_i64_sgt: -; SDAG-GFX9: ; %bb.0: -; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64 -; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX9-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1] -; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3 -; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] -; SDAG-GFX9-NEXT: s_endpgm +; GFX9-LABEL: v_icmp_i64_sgt: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x64 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; GISEL-VI-LABEL: v_icmp_i64_sgt: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-VI-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1] ; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 @@ -1191,20 +1100,6 @@ define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) { ; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm -; -; GISEL-GFX9-LABEL: v_icmp_i64_sgt: -; GISEL-GFX9: ; %bb.0: -; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX9-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1] -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38) store i64 %result, ptr addrspace(1) %out ret void @@ -1214,10 +1109,9 @@ define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_i64_sge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_le_i64_e64 s[2:3], 0x64, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1240,24 +1134,24 @@ define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; SDAG-VI-NEXT: s_endpgm ; -; SDAG-GFX9-LABEL: v_icmp_i64_sge: -; SDAG-GFX9: ; %bb.0: -; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64 -; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX9-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1] -; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3 -; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] -; SDAG-GFX9-NEXT: s_endpgm +; GFX9-LABEL: v_icmp_i64_sge: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x64 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; GISEL-VI-LABEL: v_icmp_i64_sge: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-VI-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1] ; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 @@ -1266,20 +1160,6 @@ define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) { ; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm -; -; GISEL-GFX9-LABEL: v_icmp_i64_sge: -; GISEL-GFX9: ; %bb.0: -; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX9-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1] -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39) store i64 %result, ptr addrspace(1) %out ret void @@ -1289,10 +1169,9 @@ define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_i64_slt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_gt_i64_e64 s[2:3], 0x64, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1315,24 +1194,24 @@ define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; SDAG-VI-NEXT: s_endpgm ; -; SDAG-GFX9-LABEL: v_icmp_i64_slt: -; SDAG-GFX9: ; %bb.0: -; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64 -; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1] -; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3 -; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] -; SDAG-GFX9-NEXT: s_endpgm +; GFX9-LABEL: v_icmp_i64_slt: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x64 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; GISEL-VI-LABEL: v_icmp_i64_slt: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-VI-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1] ; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 @@ -1341,20 +1220,6 @@ define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) { ; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm -; -; GISEL-GFX9-LABEL: v_icmp_i64_slt: -; GISEL-GFX9: ; %bb.0: -; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1] -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40) store i64 %result, ptr addrspace(1) %out ret void @@ -1364,10 +1229,9 @@ define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_i64_sle: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 -; GFX11-NEXT: s_mov_b64 s[4:5], 0x64 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], s[4:5] +; GFX11-NEXT: v_cmp_ge_i64_e64 s[2:3], 0x64, s[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-NEXT: v_mov_b32_e32 v1, s3 @@ -1390,24 +1254,24 @@ define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; SDAG-VI-NEXT: s_endpgm ; -; SDAG-GFX9-LABEL: v_icmp_i64_sle: -; SDAG-GFX9: ; %bb.0: -; SDAG-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v0, 0x64 -; SDAG-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX9-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1] -; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, s2 -; SDAG-GFX9-NEXT: v_mov_b32_e32 v3, s3 -; SDAG-GFX9-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] -; SDAG-GFX9-NEXT: s_endpgm +; GFX9-LABEL: v_icmp_i64_sle: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x64 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: s_endpgm ; ; GISEL-VI-LABEL: v_icmp_i64_sle: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-VI-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1] ; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 @@ -1416,20 +1280,6 @@ define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) { ; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm -; -; GISEL-GFX9-LABEL: v_icmp_i64_sle: -; GISEL-GFX9: ; %bb.0: -; GISEL-GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GISEL-GFX9-NEXT: s_mov_b64 s[4:5], 0x64 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GISEL-GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX9-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1] -; GISEL-GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41) store i64 %result, ptr addrspace(1) %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll index 2be94f4dbc6501..6e6a3cbdd4887d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll @@ -47,9 +47,7 @@ define amdgpu_kernel void @rsq_f64_constant_4.0(ptr addrspace(1) %out) #1 { } ; FUNC-LABEL: {{^}}rsq_f64_constant_100.0 -; SI-DAG: s_mov_b32 s{{[0-9]+}}, 0x40590000 -; SI-DAG: s_mov_b32 s{{[0-9]+}}, 0{{$}} -; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, 0x40590000 define amdgpu_kernel void @rsq_f64_constant_100.0(ptr addrspace(1) %out) #1 { %rsq = call double @llvm.amdgcn.rsq.f64(double 100.0) #0 store double %rsq, ptr addrspace(1) %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll index 684edd27536b54..16b1ccf58cf6a5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll @@ -898,12 +898,13 @@ define { double, i32 } @test_frexp_f64_i32(double %a) { ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-SDAG-NEXT: s_mov_b32 s4, 0 ; GFX6-SDAG-NEXT: s_mov_b32 s5, 0x7ff00000 -; GFX6-SDAG-NEXT: v_frexp_exp_i32_f64_e32 v2, v[0:1] ; GFX6-SDAG-NEXT: v_frexp_mant_f64_e32 v[3:4], v[0:1] ; GFX6-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] -; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f64_e32 v0, v[0:1] ; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc +; GFX6-SDAG-NEXT: v_mov_b32_e32 v0, v3 ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_frexp_f64_i32: @@ -936,11 +937,11 @@ define { double, i32 } @test_frexp_f64_i32(double %a) { ; GFX6-GISEL-LABEL: test_frexp_f64_i32: ; GFX6-GISEL: ; %bb.0: ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-GISEL-NEXT: s_mov_b32 s4, 0 -; GFX6-GISEL-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v6, 0x7ff00000 ; GFX6-GISEL-NEXT: v_frexp_mant_f64_e32 v[3:4], v[0:1] ; GFX6-GISEL-NEXT: v_frexp_exp_i32_f64_e32 v2, v[0:1] -; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[5:6] ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc @@ -950,16 +951,16 @@ define { double, i32 } @test_frexp_f64_i32(double %a) { } define double @test_frexp_f64_i32_only_use_fract(double %a) { -; GFX6-LABEL: test_frexp_f64_i32_only_use_fract: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_mov_b32 s4, 0 -; GFX6-NEXT: s_mov_b32 s5, 0x7ff00000 -; GFX6-NEXT: v_frexp_mant_f64_e32 v[2:3], v[0:1] -; GFX6-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX6-NEXT: s_setpc_b64 s[30:31] +; GFX6-SDAG-LABEL: test_frexp_f64_i32_only_use_fract: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX6-SDAG-NEXT: v_frexp_mant_f64_e32 v[2:3], v[0:1] +; GFX6-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_frexp_f64_i32_only_use_fract: ; GFX8: ; %bb.0: @@ -978,21 +979,32 @@ define double @test_frexp_f64_i32_only_use_fract(double %a) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_frexp_mant_f64_e32 v[0:1], v[0:1] ; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_f64_i32_only_use_fract: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v3, 0x7ff00000 +; GFX6-GISEL-NEXT: v_frexp_mant_f64_e32 v[4:5], v[0:1] +; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3] +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call { double, i32 } @llvm.frexp.f64.i32(double %a) %result.0 = extractvalue { double, i32 } %result, 0 ret double %result.0 } define i32 @test_frexp_f64_i32_only_use_exp(double %a) { -; GFX6-LABEL: test_frexp_f64_i32_only_use_exp: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_mov_b32 s4, 0 -; GFX6-NEXT: s_mov_b32 s5, 0x7ff00000 -; GFX6-NEXT: v_frexp_exp_i32_f64_e32 v2, v[0:1] -; GFX6-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] -; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc -; GFX6-NEXT: s_setpc_b64 s[30:31] +; GFX6-SDAG-LABEL: test_frexp_f64_i32_only_use_exp: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f64_e32 v2, v[0:1] +; GFX6-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_frexp_f64_i32_only_use_exp: ; GFX8: ; %bb.0: @@ -1011,6 +1023,16 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v0, v[0:1] ; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_f64_i32_only_use_exp: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v3, 0x7ff00000 +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f64_e32 v4, v[0:1] +; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3] +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call { double, i32 } @llvm.frexp.f64.i32(double %a) %result.0 = extractvalue { double, i32 } %result, 1 ret i32 %result.0 diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll index 438b1bfe319a04..2a1488652d887a 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll @@ -4558,10 +4558,10 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o ; GFX8-LABEL: constant_zextload_v16i1_to_v16i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX8-NEXT: v_mov_b32_e32 v17, 0 -; GFX8-NEXT: v_mov_b32_e32 v21, 0 -; GFX8-NEXT: v_mov_b32_e32 v19, v17 -; GFX8-NEXT: v_mov_b32_e32 v13, v17 +; GFX8-NEXT: v_mov_b32_e32 v20, 0 +; GFX8-NEXT: v_mov_b32_e32 v19, 0 +; GFX8-NEXT: v_mov_b32_e32 v17, v20 +; GFX8-NEXT: v_mov_b32_e32 v22, v20 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -4571,61 +4571,62 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o ; GFX8-NEXT: s_add_u32 s4, s0, 0x50 ; GFX8-NEXT: s_addc_u32 s5, s1, 0 ; GFX8-NEXT: v_mov_b32_e32 v0, s4 -; GFX8-NEXT: v_mov_b32_e32 v24, s3 ; GFX8-NEXT: v_mov_b32_e32 v1, s5 -; GFX8-NEXT: v_mov_b32_e32 v23, s2 -; GFX8-NEXT: s_add_u32 s2, s0, 64 -; GFX8-NEXT: s_addc_u32 s3, s1, 0 -; GFX8-NEXT: v_mov_b32_e32 v9, v17 -; GFX8-NEXT: v_mov_b32_e32 v5, v17 -; GFX8-NEXT: v_mov_b32_e32 v22, 0 +; GFX8-NEXT: v_mov_b32_e32 v23, v20 +; GFX8-NEXT: v_mov_b32_e32 v13, v20 +; GFX8-NEXT: v_mov_b32_e32 v9, v20 +; GFX8-NEXT: v_mov_b32_e32 v5, v20 +; GFX8-NEXT: v_mov_b32_e32 v25, 0 ; GFX8-NEXT: v_mov_b32_e32 v15, 0 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: v_mov_b32_e32 v7, 0 ; GFX8-NEXT: v_mov_b32_e32 v11, 0 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 10, v2 -; GFX8-NEXT: v_and_b32_e32 v18, 1, v4 +; GFX8-NEXT: v_and_b32_e32 v16, 1, v4 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 11, v2 ; GFX8-NEXT: v_and_b32_e32 v4, 1, v4 -; GFX8-NEXT: v_and_b32_e32 v20, 0xffff, v4 +; GFX8-NEXT: v_and_b32_e32 v18, 0xffff, v4 +; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[16:19] ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 14, v2 -; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[18:21] +; GFX8-NEXT: v_mov_b32_e32 v17, s3 +; GFX8-NEXT: v_mov_b32_e32 v16, s2 +; GFX8-NEXT: s_add_u32 s2, s0, 64 ; GFX8-NEXT: v_mov_b32_e32 v0, 1 -; GFX8-NEXT: v_and_b32_e32 v16, 1, v4 -; GFX8-NEXT: v_lshrrev_b16_e32 v18, 15, v2 -; GFX8-NEXT: flat_store_dwordx4 v[23:24], v[16:19] -; GFX8-NEXT: v_mov_b32_e32 v24, s3 -; GFX8-NEXT: v_and_b32_sdwa v16, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_e32 v19, 1, v4 +; GFX8-NEXT: v_lshrrev_b16_e32 v21, 15, v2 +; GFX8-NEXT: s_addc_u32 s3, s1, 0 +; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[19:22] +; GFX8-NEXT: v_mov_b32_e32 v17, s3 +; GFX8-NEXT: v_and_b32_sdwa v19, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 9, v2 -; GFX8-NEXT: v_mov_b32_e32 v23, s2 +; GFX8-NEXT: v_mov_b32_e32 v16, s2 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: s_add_u32 s2, s0, 0x60 -; GFX8-NEXT: v_mov_b32_e32 v19, 0 -; GFX8-NEXT: v_and_b32_e32 v18, 0xffff, v0 +; GFX8-NEXT: v_mov_b32_e32 v22, 0 +; GFX8-NEXT: v_and_b32_e32 v21, 0xffff, v0 ; GFX8-NEXT: s_addc_u32 s3, s1, 0 -; GFX8-NEXT: flat_store_dwordx4 v[23:24], v[16:19] -; GFX8-NEXT: v_mov_b32_e32 v24, s3 -; GFX8-NEXT: v_mov_b32_e32 v23, s2 +; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[19:22] +; GFX8-NEXT: v_mov_b32_e32 v1, v20 +; GFX8-NEXT: v_mov_b32_e32 v19, s3 +; GFX8-NEXT: v_mov_b32_e32 v18, s2 ; GFX8-NEXT: s_add_u32 s2, s0, 48 ; GFX8-NEXT: s_addc_u32 s3, s1, 0 -; GFX8-NEXT: v_mov_b32_e32 v26, s3 +; GFX8-NEXT: v_mov_b32_e32 v21, s3 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 12, v2 -; GFX8-NEXT: v_mov_b32_e32 v25, s2 +; GFX8-NEXT: v_mov_b32_e32 v20, s2 ; GFX8-NEXT: s_add_u32 s2, s0, 32 -; GFX8-NEXT: v_and_b32_e32 v19, 1, v0 +; GFX8-NEXT: v_and_b32_e32 v22, 1, v0 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 13, v2 -; GFX8-NEXT: v_mov_b32_e32 v20, v17 -; GFX8-NEXT: v_mov_b32_e32 v1, v17 ; GFX8-NEXT: v_mov_b32_e32 v17, s1 ; GFX8-NEXT: s_addc_u32 s3, s1, 0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_mov_b32_e32 v16, s0 -; GFX8-NEXT: v_and_b32_e32 v21, 0xffff, v0 +; GFX8-NEXT: v_and_b32_e32 v24, 0xffff, v0 ; GFX8-NEXT: s_add_u32 s0, s0, 16 ; GFX8-NEXT: v_lshrrev_b16_e32 v6, 7, v2 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 6, v2 -; GFX8-NEXT: flat_store_dwordx4 v[23:24], v[19:22] +; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[22:25] ; GFX8-NEXT: s_addc_u32 s1, s1, 0 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 2, v2 ; GFX8-NEXT: v_lshrrev_b16_e32 v8, 4, v2 @@ -4634,23 +4635,23 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o ; GFX8-NEXT: v_lshrrev_b16_e32 v14, 3, v2 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v2 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 -; GFX8-NEXT: v_and_b32_e32 v22, 1, v6 +; GFX8-NEXT: v_and_b32_e32 v24, 1, v6 ; GFX8-NEXT: v_mov_b32_e32 v19, s3 -; GFX8-NEXT: v_mov_b32_e32 v21, s1 +; GFX8-NEXT: v_mov_b32_e32 v23, s1 ; GFX8-NEXT: v_and_b32_e32 v10, 1, v10 ; GFX8-NEXT: v_and_b32_e32 v6, 1, v14 ; GFX8-NEXT: v_and_b32_e32 v2, 1, v2 -; GFX8-NEXT: v_and_b32_e32 v14, 0xffff, v22 +; GFX8-NEXT: v_and_b32_e32 v14, 0xffff, v24 ; GFX8-NEXT: v_mov_b32_e32 v18, s2 -; GFX8-NEXT: v_mov_b32_e32 v20, s0 +; GFX8-NEXT: v_mov_b32_e32 v22, s0 ; GFX8-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX8-NEXT: v_and_b32_e32 v8, 1, v8 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX8-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX8-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX8-NEXT: flat_store_dwordx4 v[25:26], v[12:15] +; GFX8-NEXT: flat_store_dwordx4 v[20:21], v[12:15] ; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[8:11] -; GFX8-NEXT: flat_store_dwordx4 v[20:21], v[4:7] +; GFX8-NEXT: flat_store_dwordx4 v[22:23], v[4:7] ; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[0:3] ; GFX8-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll index 25a84e9e787fb6..2ca17b535cba00 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll @@ -3583,190 +3583,191 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 -; GCN-HSA-NEXT: flat_load_dwordx4 v[20:23], v[0:1] -; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x50 -; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 -; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x60 -; GCN-HSA-NEXT: flat_load_dwordx4 v[16:19], v[0:1] -; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 -; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x70 -; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[0:1] -; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 +; GCN-HSA-NEXT: flat_load_dwordx4 v[24:27], v[0:1] ; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 ; GCN-HSA-NEXT: s_add_u32 s6, s2, 32 ; GCN-HSA-NEXT: s_addc_u32 s7, s3, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v27, s7 -; GCN-HSA-NEXT: v_mov_b32_e32 v26, s6 -; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[0:1] -; GCN-HSA-NEXT: flat_load_dwordx4 v[28:31], v[26:27] -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 -; GCN-HSA-NEXT: s_add_u32 s4, s2, 48 -; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 -; GCN-HSA-NEXT: s_add_u32 s2, s2, 64 -; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: s_add_u32 s8, s2, 48 +; GCN-HSA-NEXT: s_addc_u32 s9, s3, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v17, s9 +; GCN-HSA-NEXT: s_add_u32 s10, s2, 64 +; GCN-HSA-NEXT: v_mov_b32_e32 v16, s8 +; GCN-HSA-NEXT: flat_load_dwordx4 v[20:23], v[16:17] +; GCN-HSA-NEXT: s_addc_u32 s11, s3, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, s11 +; GCN-HSA-NEXT: s_add_u32 s10, s2, 0x50 +; GCN-HSA-NEXT: s_addc_u32 s11, s3, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 +; GCN-HSA-NEXT: s_add_u32 s10, s2, 0x60 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[4:5] +; GCN-HSA-NEXT: s_addc_u32 s11, s3, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 +; GCN-HSA-NEXT: s_add_u32 s2, s2, 0x70 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 +; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] -; GCN-HSA-NEXT: v_mov_b32_e32 v25, s5 -; GCN-HSA-NEXT: v_mov_b32_e32 v24, s4 -; GCN-HSA-NEXT: flat_load_dwordx4 v[24:27], v[24:25] +; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 +; GCN-HSA-NEXT: flat_load_dwordx4 v[28:31], v[12:13] +; GCN-HSA-NEXT: v_mov_b32_e32 v13, s5 +; GCN-HSA-NEXT: v_mov_b32_e32 v15, s7 +; GCN-HSA-NEXT: v_mov_b32_e32 v12, s4 +; GCN-HSA-NEXT: v_mov_b32_e32 v14, s6 +; GCN-HSA-NEXT: flat_load_dwordx4 v[16:19], v[12:13] +; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[14:15] ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xe0 -; GCN-HSA-NEXT: v_mov_b32_e32 v33, s1 -; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v32, s0 +; GCN-HSA-NEXT: v_mov_b32_e32 v37, s1 +; GCN-HSA-NEXT: v_mov_b32_e32 v36, s0 ; GCN-HSA-NEXT: s_waitcnt vmcnt(7) -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v37, 16, v21 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v35, 16, v20 -; GCN-HSA-NEXT: v_and_b32_e32 v36, 0xffff, v21 -; GCN-HSA-NEXT: v_and_b32_e32 v34, 0xffff, v20 -; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 -; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 -; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 -; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[34:37] +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v35, 16, v25 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v33, 16, v24 +; GCN-HSA-NEXT: v_and_b32_e32 v34, 0xffff, v25 +; GCN-HSA-NEXT: v_and_b32_e32 v32, 0xffff, v24 +; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v37, s5 -; GCN-HSA-NEXT: v_mov_b32_e32 v36, s4 -; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xc0 +; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xf0 +; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[32:35] +; GCN-HSA-NEXT: v_mov_b32_e32 v37, s3 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v36, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 +; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 ; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xd0 ; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 ; GCN-HSA-NEXT: s_add_u32 s8, s0, 0xa0 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v35, 16, v23 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v33, 16, v22 -; GCN-HSA-NEXT: v_and_b32_e32 v34, 0xffff, v23 -; GCN-HSA-NEXT: v_and_b32_e32 v32, 0xffff, v22 ; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 -; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[32:35] +; GCN-HSA-NEXT: s_add_u32 s10, s0, 0xb0 +; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 +; GCN-HSA-NEXT: s_add_u32 s12, s0, 0x80 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v35, 16, v27 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v33, 16, v26 +; GCN-HSA-NEXT: v_and_b32_e32 v34, 0xffff, v27 +; GCN-HSA-NEXT: v_and_b32_e32 v32, 0xffff, v26 +; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 +; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[32:35] +; GCN-HSA-NEXT: s_waitcnt vmcnt(7) +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v1 +; GCN-HSA-NEXT: v_mov_b32_e32 v33, s13 +; GCN-HSA-NEXT: v_mov_b32_e32 v32, s12 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v0 +; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v1 +; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 +; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[24:27] +; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 +; GCN-HSA-NEXT: s_waitcnt vmcnt(7) +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v9 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v8 +; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v9 +; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v8 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, s10 +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27] +; GCN-HSA-NEXT: v_mov_b32_e32 v9, s11 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v11 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v10 +; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v11 +; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v10 +; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[24:27] ; GCN-HSA-NEXT: s_waitcnt vmcnt(8) -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v23, 16, v17 -; GCN-HSA-NEXT: v_mov_b32_e32 v33, s9 -; GCN-HSA-NEXT: v_mov_b32_e32 v32, s8 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v21, 16, v16 -; GCN-HSA-NEXT: v_and_b32_e32 v22, 0xffff, v17 -; GCN-HSA-NEXT: v_and_b32_e32 v20, 0xffff, v16 -; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23] -; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3 -; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2 -; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0 -; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 -; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 -; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v22, 16, v19 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v18 -; GCN-HSA-NEXT: v_and_b32_e32 v21, 0xffff, v19 -; GCN-HSA-NEXT: v_and_b32_e32 v19, 0xffff, v18 -; GCN-HSA-NEXT: v_mov_b32_e32 v35, s5 -; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v34, s4 -; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[19:22] -; GCN-HSA-NEXT: s_waitcnt vmcnt(9) -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v12 -; GCN-HSA-NEXT: v_mov_b32_e32 v21, s7 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v13 -; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v13 -; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v12 -; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3 -; GCN-HSA-NEXT: v_mov_b32_e32 v20, s6 -; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[16:19] -; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v15 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v14 -; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v15 -; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v14 -; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 -; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[15:18] -; GCN-HSA-NEXT: s_waitcnt vmcnt(10) -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v8 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v15, 16, v9 -; GCN-HSA-NEXT: v_and_b32_e32 v14, 0xffff, v9 -; GCN-HSA-NEXT: v_and_b32_e32 v12, 0xffff, v8 -; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v11 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v10 -; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v11 -; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v10 -; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[12:15] -; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[16:19] -; GCN-HSA-NEXT: s_waitcnt vmcnt(9) -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v5 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v12, 16, v4 -; GCN-HSA-NEXT: v_and_b32_e32 v13, 0xffff, v5 -; GCN-HSA-NEXT: v_and_b32_e32 v11, 0xffff, v4 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v5 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v4 +; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v5 +; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v4 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 -; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 -; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[11:14] -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v7 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v1 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v12, 16, v0 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v6 -; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v7 -; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v6 -; GCN-HSA-NEXT: v_and_b32_e32 v13, 0xffff, v1 -; GCN-HSA-NEXT: v_and_b32_e32 v11, 0xffff, v0 +; GCN-HSA-NEXT: v_mov_b32_e32 v33, s7 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v7 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v6 +; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v7 +; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v6 +; GCN-HSA-NEXT: v_mov_b32_e32 v32, s6 +; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[8:11] +; GCN-HSA-NEXT: s_waitcnt vmcnt(8) +; GCN-HSA-NEXT: v_and_b32_e32 v7, 0xffff, v28 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v29 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v8, 16, v28 +; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v29 +; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 +; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[24:27] ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v31 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v30 +; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v31 +; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v30 +; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[7:10] +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27] ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v3 -; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[15:18] -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v30 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v29 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v28 -; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v3 -; GCN-HSA-NEXT: s_waitcnt vmcnt(10) -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v22, 16, v27 -; GCN-HSA-NEXT: v_and_b32_e32 v3, 0xffff, v30 -; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v29 -; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v28 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v30, 16, v25 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v28, 16, v24 -; GCN-HSA-NEXT: v_and_b32_e32 v21, 0xffff, v27 -; GCN-HSA-NEXT: v_and_b32_e32 v29, 0xffff, v25 -; GCN-HSA-NEXT: v_and_b32_e32 v27, 0xffff, v24 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v3 +; GCN-HSA-NEXT: v_and_b32_e32 v3, 0xffff, v2 +; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[3:6] +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v20 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[27:30] ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v26 -; GCN-HSA-NEXT: v_and_b32_e32 v19, 0xffff, v26 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v21 +; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v21 +; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v20 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[19:22] +; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v23 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v22 +; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v23 +; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v22 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCN-HSA-NEXT: s_waitcnt vmcnt(12) +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v18 +; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] +; GCN-HSA-NEXT: s_waitcnt vmcnt(12) +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v15 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v17 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v16 +; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v18 +; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v17 +; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v16 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v13 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v12 +; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v15 +; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v13 +; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v12 +; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[15:18] ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v31 -; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v31 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[15:18] +; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[3:6] +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v14 +; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v14 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11] ; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[11:14] +; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GCN-HSA-NEXT: v_and_b32_e32 v7, 0xffff, v2 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[7:10] +; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v19 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 +; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v19 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 +; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-HSA-NEXT: s_endpgm ; ; GCN-NOHSA-VI-LABEL: global_zextload_v64i16_to_v64i32: @@ -4383,189 +4384,191 @@ define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %ou ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCN-HSA-NEXT: flat_load_dwordx4 v[28:31], v[0:1] ; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x70 -; GCN-HSA-NEXT: flat_load_dwordx4 v[20:23], v[0:1] ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 +; GCN-HSA-NEXT: flat_load_dwordx4 v[20:23], v[0:1] ; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x60 -; GCN-HSA-NEXT: flat_load_dwordx4 v[16:19], v[0:1] ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x50 -; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 -; GCN-HSA-NEXT: s_add_u32 s8, s2, 64 ; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[0:1] +; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 +; GCN-HSA-NEXT: s_add_u32 s4, s2, 64 +; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 -; GCN-HSA-NEXT: s_addc_u32 s9, s3, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 ; GCN-HSA-NEXT: s_add_u32 s4, s2, 48 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 ; GCN-HSA-NEXT: s_add_u32 s6, s2, 32 ; GCN-HSA-NEXT: s_addc_u32 s7, s3, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v25, s7 ; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 -; GCN-HSA-NEXT: v_mov_b32_e32 v24, s6 -; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[0:1] -; GCN-HSA-NEXT: flat_load_dwordx4 v[28:31], v[24:25] -; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 -; GCN-HSA-NEXT: v_mov_b32_e32 v25, s5 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 -; GCN-HSA-NEXT: v_mov_b32_e32 v24, s4 -; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1] -; GCN-HSA-NEXT: flat_load_dwordx4 v[24:27], v[24:25] -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 ; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 +; GCN-HSA-NEXT: flat_load_dwordx4 v[24:27], v[8:9] +; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4 +; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GCN-HSA-NEXT: v_mov_b32_e32 v17, s7 +; GCN-HSA-NEXT: v_mov_b32_e32 v16, s6 +; GCN-HSA-NEXT: flat_load_dwordx4 v[16:19], v[16:17] ; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xe0 ; GCN-HSA-NEXT: v_mov_b32_e32 v37, s1 -; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v36, s0 ; GCN-HSA-NEXT: s_waitcnt vmcnt(7) -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v35, 16, v21 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v33, 16, v20 -; GCN-HSA-NEXT: v_bfe_i32 v34, v21, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v32, v20, 0, 16 -; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 -; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 -; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v35, 16, v29 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v33, 16, v28 +; GCN-HSA-NEXT: v_bfe_i32 v34, v29, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v32, v28, 0, 16 +; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 +; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 ; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[32:35] +; GCN-HSA-NEXT: v_mov_b32_e32 v36, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v35, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v34, 16, v31 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v32, 16, v30 +; GCN-HSA-NEXT: v_bfe_i32 v33, v31, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v31, v30, 0, 16 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v35, 16, v23 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v33, 16, v22 -; GCN-HSA-NEXT: v_bfe_i32 v34, v23, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v32, v22, 0, 16 -; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[32:35] -; GCN-HSA-NEXT: v_mov_b32_e32 v37, s5 +; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[31:34] +; GCN-HSA-NEXT: s_waitcnt vmcnt(8) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v29, 16, v20 ; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3 ; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v31, 16, v21 +; GCN-HSA-NEXT: v_bfe_i32 v30, v21, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v28, v20, 0, 16 +; GCN-HSA-NEXT: flat_store_dwordx4 v[35:36], v[28:31] ; GCN-HSA-NEXT: v_mov_b32_e32 v35, s3 ; GCN-HSA-NEXT: v_mov_b32_e32 v34, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0 -; GCN-HSA-NEXT: v_mov_b32_e32 v36, s4 -; GCN-HSA-NEXT: s_waitcnt vmcnt(8) -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 16, v17 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 16, v16 -; GCN-HSA-NEXT: v_bfe_i32 v22, v17, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v20, v16, 0, 16 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[20:23] ; GCN-HSA-NEXT: v_mov_b32_e32 v37, s3 ; GCN-HSA-NEXT: v_mov_b32_e32 v36, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v22, 16, v19 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v20, 16, v18 -; GCN-HSA-NEXT: v_bfe_i32 v21, v19, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v19, v18, 0, 16 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v31, 16, v23 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v29, 16, v22 +; GCN-HSA-NEXT: v_bfe_i32 v30, v23, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v28, v22, 0, 16 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[19:22] +; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[28:31] ; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3 ; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: s_waitcnt vmcnt(9) -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 16, v13 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 16, v12 -; GCN-HSA-NEXT: v_bfe_i32 v18, v13, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v16, v12, 0, 16 ; GCN-HSA-NEXT: v_mov_b32_e32 v39, s3 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 16, v15 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 16, v14 -; GCN-HSA-NEXT: v_bfe_i32 v22, v15, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v20, v14, 0, 16 +; GCN-HSA-NEXT: s_waitcnt vmcnt(9) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 16, v13 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 16, v12 +; GCN-HSA-NEXT: v_bfe_i32 v22, v13, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v20, v12, 0, 16 ; GCN-HSA-NEXT: v_mov_b32_e32 v38, s2 -; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[16:19] -; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[20:23] -; GCN-HSA-NEXT: s_waitcnt vmcnt(10) -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 16, v9 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 16, v8 -; GCN-HSA-NEXT: v_bfe_i32 v17, v9, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v15, v8, 0, 16 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 16, v11 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 16, v10 -; GCN-HSA-NEXT: v_bfe_i32 v13, v11, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v11, v10, 0, 16 -; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[15:18] -; GCN-HSA-NEXT: flat_store_dwordx4 v[38:39], v[11:14] -; GCN-HSA-NEXT: s_waitcnt vmcnt(8) -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 16, v1 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 16, v0 -; GCN-HSA-NEXT: v_bfe_i32 v17, v1, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v15, v0, 0, 16 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v31, 16, v15 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v29, 16, v14 +; GCN-HSA-NEXT: v_bfe_i32 v30, v15, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v28, v14, 0, 16 +; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[20:23] +; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[28:31] +; GCN-HSA-NEXT: s_waitcnt vmcnt(10) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v5 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v4 +; GCN-HSA-NEXT: v_bfe_i32 v14, v5, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v12, v4, 0, 16 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 16, v7 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 16, v6 +; GCN-HSA-NEXT: v_bfe_i32 v22, v7, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v20, v6, 0, 16 +; GCN-HSA-NEXT: s_waitcnt vmcnt(9) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v1 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v0 +; GCN-HSA-NEXT: v_bfe_i32 v6, v1, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v4, v0, 0, 16 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[15:18] +; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[12:15] +; GCN-HSA-NEXT: flat_store_dwordx4 v[38:39], v[20:23] +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v7 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 16, v6 -; GCN-HSA-NEXT: v_bfe_i32 v9, v7, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 16, v5 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 16, v4 -; GCN-HSA-NEXT: v_bfe_i32 v13, v5, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v11, v4, 0, 16 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v6, 16, v3 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v4, 16, v2 -; GCN-HSA-NEXT: v_bfe_i32 v5, v3, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v3, v2, 0, 16 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 16, v3 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 16, v2 +; GCN-HSA-NEXT: v_bfe_i32 v22, v3, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v20, v2, 0, 16 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[3:6] -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v20, 16, v28 -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[20:23] +; GCN-HSA-NEXT: s_waitcnt vmcnt(11) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v9 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v8 +; GCN-HSA-NEXT: v_bfe_i32 v2, v9, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v0, v8, 0, 16 +; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 -; GCN-HSA-NEXT: v_bfe_i32 v19, v28, 0, 16 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v27 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v26 -; GCN-HSA-NEXT: v_bfe_i32 v2, v27, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v0, v26, 0, 16 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v28, 16, v25 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v26, 16, v24 -; GCN-HSA-NEXT: v_bfe_i32 v27, v25, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v25, v24, 0, 16 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[25:28] -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] +; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 -; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v24 +; GCN-HSA-NEXT: v_bfe_i32 v12, v24, 0, 16 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v24, 16, v11 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v22, 16, v10 +; GCN-HSA-NEXT: v_bfe_i32 v23, v11, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v21, v10, 0, 16 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v22, 16, v29 -; GCN-HSA-NEXT: v_bfe_i32 v21, v29, 0, 16 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[21:24] +; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[19:22] +; GCN-HSA-NEXT: s_waitcnt vmcnt(12) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v19 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v18 +; GCN-HSA-NEXT: v_bfe_i32 v2, v19, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v0, v18, 0, 16 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v20, 16, v17 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 16, v16 +; GCN-HSA-NEXT: v_bfe_i32 v19, v17, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v17, v16, 0, 16 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 16, v31 -; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 16, v30 -; GCN-HSA-NEXT: v_bfe_i32 v17, v31, 0, 16 -; GCN-HSA-NEXT: v_bfe_i32 v15, v30, 0, 16 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[17:20] +; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[15:18] +; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v25 +; GCN-HSA-NEXT: v_bfe_i32 v14, v25, 0, 16 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[11:14] +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[12:15] ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v27 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v26 +; GCN-HSA-NEXT: v_bfe_i32 v6, v27, 0, 16 +; GCN-HSA-NEXT: v_bfe_i32 v4, v26, 0, 16 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[7:10] +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] ; GCN-HSA-NEXT: s_endpgm ; ; GCN-NOHSA-VI-LABEL: global_sextload_v64i16_to_v64i32: @@ -6526,16 +6529,16 @@ define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(ptr addrspace(1) %ou ; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i64: ; GCN-HSA: ; %bb.0: ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v17, v8 -; GCN-HSA-NEXT: v_mov_b32_e32 v19, v8 -; GCN-HSA-NEXT: v_mov_b32_e32 v11, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v15, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v17, v15 +; GCN-HSA-NEXT: v_mov_b32_e32 v19, v15 +; GCN-HSA-NEXT: v_mov_b32_e32 v9, 0 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 -; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 ; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 ; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] @@ -6543,65 +6546,65 @@ define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(ptr addrspace(1) %ou ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v10, s5 -; GCN-HSA-NEXT: v_mov_b32_e32 v9, s4 +; GCN-HSA-NEXT: v_mov_b32_e32 v11, s5 +; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x50 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 -; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 -; GCN-HSA-NEXT: v_mov_b32_e32 v21, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v20, s0 -; GCN-HSA-NEXT: v_mov_b32_e32 v15, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v13, 0 ; GCN-HSA-NEXT: s_waitcnt vmcnt(1) ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v1 ; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v1 -; GCN-HSA-NEXT: flat_store_dwordx4 v[9:10], v[16:19] -; GCN-HSA-NEXT: v_mov_b32_e32 v10, s5 -; GCN-HSA-NEXT: v_mov_b32_e32 v9, s4 +; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[16:19] +; GCN-HSA-NEXT: v_mov_b32_e32 v11, s5 +; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4 +; GCN-HSA-NEXT: s_waitcnt vmcnt(1) +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v5 +; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v5 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x70 +; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[16:19] ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v7 +; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v7 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v7, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3 -; GCN-HSA-NEXT: s_waitcnt vmcnt(1) -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v5 -; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v5 -; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2 +; GCN-HSA-NEXT: v_mov_b32_e32 v11, s5 +; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4 +; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 -; GCN-HSA-NEXT: flat_store_dwordx4 v[9:10], v[16:19] -; GCN-HSA-NEXT: v_mov_b32_e32 v10, s5 +; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[16:19] +; GCN-HSA-NEXT: v_mov_b32_e32 v20, s1 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v9, s4 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v7 -; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v7 +; GCN-HSA-NEXT: v_mov_b32_e32 v19, s0 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 0x60 -; GCN-HSA-NEXT: flat_store_dwordx4 v[9:10], v[16:19] -; GCN-HSA-NEXT: v_mov_b32_e32 v10, v8 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v3 -; GCN-HSA-NEXT: v_and_b32_e32 v7, 0xffff, v3 ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 -; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[7:10] +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v3 +; GCN-HSA-NEXT: v_and_b32_e32 v14, 0xffff, v3 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v3, v8 -; GCN-HSA-NEXT: v_mov_b32_e32 v13, v8 -; GCN-HSA-NEXT: v_mov_b32_e32 v9, v8 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v4 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v6 -; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GCN-HSA-NEXT: v_and_b32_e32 v12, 0xffff, v4 -; GCN-HSA-NEXT: v_mov_b32_e32 v7, s3 -; GCN-HSA-NEXT: v_mov_b32_e32 v25, s1 +; GCN-HSA-NEXT: v_mov_b32_e32 v3, v15 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v12, 16, v4 +; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v24, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v26, s1 ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GCN-HSA-NEXT: v_mov_b32_e32 v19, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v6, s2 -; GCN-HSA-NEXT: v_mov_b32_e32 v24, s0 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v0 -; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v0 -; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[2:5] -; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] -; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[12:15] -; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[8:11] +; GCN-HSA-NEXT: v_mov_b32_e32 v18, 0 +; GCN-HSA-NEXT: flat_store_dwordx4 v[7:8], v[14:17] +; GCN-HSA-NEXT: v_mov_b32_e32 v11, v15 +; GCN-HSA-NEXT: v_mov_b32_e32 v16, v15 +; GCN-HSA-NEXT: v_mov_b32_e32 v7, v15 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v8, 16, v6 +; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; GCN-HSA-NEXT: v_mov_b32_e32 v23, s2 +; GCN-HSA-NEXT: v_mov_b32_e32 v25, s0 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v0 +; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v0 +; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[2:5] +; GCN-HSA-NEXT: flat_store_dwordx4 v[19:20], v[15:18] +; GCN-HSA-NEXT: flat_store_dwordx4 v[23:24], v[10:13] +; GCN-HSA-NEXT: flat_store_dwordx4 v[25:26], v[6:9] ; GCN-HSA-NEXT: s_endpgm ; ; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i64: @@ -7358,26 +7361,27 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou ; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i64: ; GCN-HSA: ; %bb.0: ; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-HSA-NEXT: v_mov_b32_e32 v26, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v29, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v25, 0 ; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) ; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 -; GCN-HSA-NEXT: flat_load_dwordx4 v[2:5], v[0:1] +; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 ; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 -; GCN-HSA-NEXT: flat_load_dwordx4 v[6:9], v[0:1] -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 +; GCN-HSA-NEXT: flat_load_dwordx4 v[5:8], v[4:5] +; GCN-HSA-NEXT: v_mov_b32_e32 v10, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v9, s2 ; GCN-HSA-NEXT: s_add_u32 s2, s2, 48 -; GCN-HSA-NEXT: flat_load_dwordx4 v[10:13], v[0:1] ; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 -; GCN-HSA-NEXT: flat_load_dwordx4 v[14:17], v[0:1] +; GCN-HSA-NEXT: flat_load_dwordx4 v[9:12], v[9:10] +; GCN-HSA-NEXT: v_mov_b32_e32 v14, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v13, s2 +; GCN-HSA-NEXT: flat_load_dwordx4 v[13:16], v[13:14] ; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 ; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 @@ -7392,124 +7396,123 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou ; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 ; GCN-HSA-NEXT: s_add_u32 s14, s0, 0x70 ; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v23, s15 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v22, s14 +; GCN-HSA-NEXT: v_mov_b32_e32 v22, s15 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v21, s14 ; GCN-HSA-NEXT: s_add_u32 s14, s0, 0x50 -; GCN-HSA-NEXT: v_mov_b32_e32 v19, v1 -; GCN-HSA-NEXT: v_mov_b32_e32 v21, v1 +; GCN-HSA-NEXT: v_mov_b32_e32 v18, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v20, v4 ; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v24, v1 +; GCN-HSA-NEXT: v_mov_b32_e32 v27, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v23, v4 ; GCN-HSA-NEXT: s_waitcnt vmcnt(3) -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v5 -; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v5 -; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21] -; GCN-HSA-NEXT: v_mov_b32_e32 v23, s15 -; GCN-HSA-NEXT: v_mov_b32_e32 v22, s14 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v3 -; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v3 -; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21] -; GCN-HSA-NEXT: v_mov_b32_e32 v23, s11 -; GCN-HSA-NEXT: v_mov_b32_e32 v22, s10 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v3 +; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v3 +; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[17:20] +; GCN-HSA-NEXT: v_mov_b32_e32 v22, s15 +; GCN-HSA-NEXT: v_mov_b32_e32 v21, s14 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v1 +; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v1 +; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[17:20] +; GCN-HSA-NEXT: v_mov_b32_e32 v22, s11 +; GCN-HSA-NEXT: v_mov_b32_e32 v21, s10 ; GCN-HSA-NEXT: s_waitcnt vmcnt(4) -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v9 -; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v9 -; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21] -; GCN-HSA-NEXT: v_mov_b32_e32 v23, s13 -; GCN-HSA-NEXT: v_mov_b32_e32 v22, s12 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v7 -; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v7 -; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21] -; GCN-HSA-NEXT: v_mov_b32_e32 v23, s5 -; GCN-HSA-NEXT: v_mov_b32_e32 v22, s4 -; GCN-HSA-NEXT: s_waitcnt vmcnt(5) -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v11 -; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v11 -; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[18:21] -; GCN-HSA-NEXT: v_mov_b32_e32 v22, s7 -; GCN-HSA-NEXT: v_mov_b32_e32 v18, v1 -; GCN-HSA-NEXT: v_mov_b32_e32 v20, v1 -; GCN-HSA-NEXT: v_mov_b32_e32 v21, s6 -; GCN-HSA-NEXT: s_waitcnt vmcnt(5) -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v17 -; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v17 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v8 +; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v8 ; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[17:20] -; GCN-HSA-NEXT: v_mov_b32_e32 v22, s9 -; GCN-HSA-NEXT: s_add_u32 s4, s0, 32 -; GCN-HSA-NEXT: v_mov_b32_e32 v21, s8 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v15 -; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v15 -; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v22, s13 +; GCN-HSA-NEXT: v_mov_b32_e32 v21, s12 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v6 +; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v6 ; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[17:20] ; GCN-HSA-NEXT: v_mov_b32_e32 v22, s5 -; GCN-HSA-NEXT: v_mov_b32_e32 v20, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v21, s4 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v12 -; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v12 -; GCN-HSA-NEXT: v_mov_b32_e32 v12, s1 -; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xe0 -; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[17:20] -; GCN-HSA-NEXT: v_mov_b32_e32 v11, s0 -; GCN-HSA-NEXT: v_mov_b32_e32 v20, 0 +; GCN-HSA-NEXT: s_waitcnt vmcnt(5) ; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v10 ; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v10 +; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[17:20] +; GCN-HSA-NEXT: v_mov_b32_e32 v21, s7 +; GCN-HSA-NEXT: v_mov_b32_e32 v17, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v19, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v20, s6 +; GCN-HSA-NEXT: s_waitcnt vmcnt(5) +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v16 +; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] +; GCN-HSA-NEXT: v_mov_b32_e32 v21, s9 +; GCN-HSA-NEXT: s_add_u32 s4, s0, 32 +; GCN-HSA-NEXT: v_mov_b32_e32 v20, s8 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v14 +; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v14 ; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 -; GCN-HSA-NEXT: flat_store_dwordx4 v[11:12], v[17:20] -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v16 -; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v16 -; GCN-HSA-NEXT: v_mov_b32_e32 v16, s5 -; GCN-HSA-NEXT: v_mov_b32_e32 v12, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v10, v1 -; GCN-HSA-NEXT: v_mov_b32_e32 v15, s4 -; GCN-HSA-NEXT: flat_store_dwordx4 v[15:16], v[9:12] -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v14 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v2 -; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v2 -; GCN-HSA-NEXT: v_and_b32_e32 v23, 0xffff, v14 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v13 -; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v13 -; GCN-HSA-NEXT: v_mov_b32_e32 v14, s3 -; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 -; GCN-HSA-NEXT: v_mov_b32_e32 v13, s2 +; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] +; GCN-HSA-NEXT: v_mov_b32_e32 v21, s5 +; GCN-HSA-NEXT: v_mov_b32_e32 v19, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v20, s4 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v11 +; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v11 +; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 +; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xe0 +; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] +; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 +; GCN-HSA-NEXT: v_mov_b32_e32 v19, 0 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v9 +; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v9 +; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 +; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[16:19] +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v15 +; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v15 +; GCN-HSA-NEXT: v_mov_b32_e32 v15, s5 +; GCN-HSA-NEXT: v_mov_b32_e32 v11, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v9, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v14, s4 +; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v0 +; GCN-HSA-NEXT: v_and_b32_e32 v14, 0xffff, v0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v5 +; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v5 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v12 +; GCN-HSA-NEXT: v_and_b32_e32 v3, 0xffff, v12 +; GCN-HSA-NEXT: v_mov_b32_e32 v6, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 -; GCN-HSA-NEXT: flat_store_dwordx4 v[13:14], v[0:3] +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[3:6] ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 -; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v28, 16, v13 +; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v13 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 -; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[23:26] +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[26:29] ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v21, 16, v8 -; GCN-HSA-NEXT: v_and_b32_e32 v19, 0xffff, v8 -; GCN-HSA-NEXT: v_mov_b32_e32 v22, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v20, v1 -; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v24, 16, v7 +; GCN-HSA-NEXT: v_and_b32_e32 v22, 0xffff, v7 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 -; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[19:22] +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[22:25] ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 -; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCN-HSA-NEXT: v_mov_b32_e32 v21, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v19, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 ; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[18:21] ; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v6 -; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v6 -; GCN-HSA-NEXT: v_mov_b32_e32 v18, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v16, v1 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v2 +; GCN-HSA-NEXT: v_mov_b32_e32 v11, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 ; GCN-HSA-NEXT: s_add_u32 s0, s0, 64 -; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[15:18] -; GCN-HSA-NEXT: v_mov_b32_e32 v6, v1 -; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[8:11] ; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 -; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v4 -; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v4 -; GCN-HSA-NEXT: v_mov_b32_e32 v12, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v17, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v15, v4 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 -; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[5:8] -; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[9:12] +; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[14:17] ; GCN-HSA-NEXT: s_endpgm ; ; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i64: diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll index ea1e784fe58e2e..dcaf7664d5b5aa 100644 --- a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll @@ -45,14 +45,14 @@ define i8 @flat_inst_valu_offset_11bit_max(ptr %p) { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: flat_inst_valu_offset_11bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: flat_inst_valu_offset_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_inst_valu_offset_11bit_max: ; GFX11: ; %bb.0: @@ -60,18 +60,6 @@ define i8 @flat_inst_valu_offset_11bit_max(ptr %p) { ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: flat_inst_valu_offset_11bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x7ff -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr i8, ptr %p, i64 2047 %load = load i8, ptr %gep, align 4 ret i8 %load @@ -85,14 +73,14 @@ define i8 @flat_inst_valu_offset_12bit_max(ptr %p) { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: flat_inst_valu_offset_12bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: flat_inst_valu_offset_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_inst_valu_offset_12bit_max: ; GFX11: ; %bb.0: @@ -100,18 +88,6 @@ define i8 @flat_inst_valu_offset_12bit_max(ptr %p) { ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: flat_inst_valu_offset_12bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0xfff -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr i8, ptr %p, i64 4095 %load = load i8, ptr %gep, align 4 ret i8 %load @@ -127,14 +103,14 @@ define i8 @flat_inst_valu_offset_13bit_max(ptr %p) { ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: flat_inst_valu_offset_13bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: flat_inst_valu_offset_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: flat_inst_valu_offset_13bit_max: ; GFX11-SDAG: ; %bb.0: @@ -148,36 +124,17 @@ define i8 @flat_inst_valu_offset_13bit_max(ptr %p) { ; GFX9-GISEL-LABEL: flat_inst_valu_offset_13bit_max: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 +; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: flat_inst_valu_offset_13bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-GISEL-LABEL: flat_inst_valu_offset_13bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x1fff -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 +; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -187,610 +144,320 @@ define i8 @flat_inst_valu_offset_13bit_max(ptr %p) { } define i8 @flat_inst_valu_offset_neg_11bit_max(ptr %p) { -; GFX9-SDAG-LABEL: flat_inst_valu_offset_neg_11bit_max: +; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_neg_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: flat_inst_valu_offset_neg_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, ptr %p, i64 -2048 + %load = load i8, ptr %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_neg_12bit_max(ptr %p) { +; GFX9-LABEL: flat_inst_valu_offset_neg_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_neg_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: flat_inst_valu_offset_neg_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, ptr %p, i64 -4096 + %load = load i8, ptr %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_neg_13bit_max(ptr %p) { +; GFX9-LABEL: flat_inst_valu_offset_neg_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_neg_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: flat_inst_valu_offset_neg_13bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, ptr %p, i64 -8192 + %load = load i8, ptr %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_2x_11bit_max(ptr %p) { +; GFX9-LABEL: flat_inst_valu_offset_2x_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_2x_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: flat_inst_valu_offset_2x_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, ptr %p, i64 4095 + %load = load i8, ptr %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_2x_12bit_max(ptr %p) { +; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 -; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: flat_inst_valu_offset_neg_11bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: flat_inst_valu_offset_2x_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: flat_inst_valu_offset_neg_11bit_max: +; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 -; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: flat_inst_valu_offset_neg_11bit_max: +; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xf800 -; GFX9-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 +; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: flat_inst_valu_offset_neg_11bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf800 -; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_11bit_max: +; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xf800 -; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 +; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -2048 + %gep = getelementptr i8, ptr %p, i64 8191 %load = load i8, ptr %gep, align 4 ret i8 %load } -define i8 @flat_inst_valu_offset_neg_12bit_max(ptr %p) { -; GFX9-SDAG-LABEL: flat_inst_valu_offset_neg_12bit_max: +define i8 @flat_inst_valu_offset_2x_13bit_max(ptr %p) { +; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 -; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 +; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: flat_inst_valu_offset_neg_12bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: flat_inst_valu_offset_2x_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: flat_inst_valu_offset_neg_12bit_max: +; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 -; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: flat_inst_valu_offset_neg_12bit_max: +; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xf000 -; GFX9-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x3fff, v0 +; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: flat_inst_valu_offset_neg_12bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf000 -; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_12bit_max: +; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xf000 -; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0 +; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -4096 - %load = load i8, ptr %gep, align 4 - ret i8 %load -} - -define i8 @flat_inst_valu_offset_neg_13bit_max(ptr %p) { -; GFX9-SDAG-LABEL: flat_inst_valu_offset_neg_13bit_max: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 -; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: flat_inst_valu_offset_neg_13bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: flat_inst_valu_offset_neg_13bit_max: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: flat_inst_valu_offset_neg_13bit_max: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xe000 -; GFX9-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: flat_inst_valu_offset_neg_13bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xe000 -; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_13bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xe000 -; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -8192 + %gep = getelementptr i8, ptr %p, i64 16383 %load = load i8, ptr %gep, align 4 ret i8 %load } -define i8 @flat_inst_valu_offset_2x_11bit_max(ptr %p) { -; GFX9-LABEL: flat_inst_valu_offset_2x_11bit_max: +define i8 @flat_inst_valu_offset_2x_neg_11bit_max(ptr %p) { +; GFX9-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_11bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: flat_inst_valu_offset_2x_11bit_max: +; GFX11-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_11bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0xfff -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 4095 - %load = load i8, ptr %gep, align 4 - ret i8 %load -} - -define i8 @flat_inst_valu_offset_2x_12bit_max(ptr %p) { -; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 -; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x1fff -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 8191 - %load = load i8, ptr %gep, align 4 - ret i8 %load -} - -define i8 @flat_inst_valu_offset_2x_13bit_max(ptr %p) { -; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 -; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 -; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], 0x3fff -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x3fff -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x3fff -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 16383 - %load = load i8, ptr %gep, align 4 - ret i8 %load -} - -define i8 @flat_inst_valu_offset_2x_neg_11bit_max(ptr %p) { -; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 -; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 -; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xf000 -; GFX9-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf000 -; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xf000 -; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr i8, ptr %p, i64 -4096 %load = load i8, ptr %gep, align 4 ret i8 %load } define i8 @flat_inst_valu_offset_2x_neg_12bit_max(ptr %p) { -; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 -; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xe000 -; GFX9-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xe000 -; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xe000 -; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr i8, ptr %p, i64 -8192 %load = load i8, ptr %gep, align 4 ret i8 %load } define i8 @flat_inst_valu_offset_2x_neg_13bit_max(ptr %p) { -; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 -; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 -; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xc000 -; GFX9-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xc000 -; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1] -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xc000 -; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr i8, ptr %p, i64 -16384 %load = load i8, ptr %gep, align 4 ret i8 %load diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll index c9c6c0912bda90..137c83a0fd80c9 100644 --- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll @@ -72,11 +72,8 @@ define i8 @global_inst_valu_offset_12bit_max(ptr addrspace(1) %p) { ; GFX10-GISEL-LABEL: global_inst_valu_offset_12bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0xfff -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 +; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -105,11 +102,8 @@ define i8 @global_inst_valu_offset_13bit_max(ptr addrspace(1) %p) { ; GFX9-GISEL-LABEL: global_inst_valu_offset_13bit_max: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 +; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -117,11 +111,8 @@ define i8 @global_inst_valu_offset_13bit_max(ptr addrspace(1) %p) { ; GFX10-GISEL-LABEL: global_inst_valu_offset_13bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 +; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -129,12 +120,8 @@ define i8 @global_inst_valu_offset_13bit_max(ptr addrspace(1) %p) { ; GFX11-GISEL-LABEL: global_inst_valu_offset_13bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x1fff -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 +; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -204,18 +191,14 @@ define i8 @global_inst_valu_offset_neg_12bit_max(ptr addrspace(1) %p) { ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: global_inst_valu_offset_neg_12bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf000 -; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_inst_valu_offset_neg_12bit_max: ; GFX11: ; %bb.0: @@ -223,87 +206,38 @@ define i8 @global_inst_valu_offset_neg_12bit_max(ptr addrspace(1) %p) { ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: global_inst_valu_offset_neg_12bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096 %load = load i8, ptr addrspace(1) %gep, align 4 ret i8 %load } define i8 @global_inst_valu_offset_neg_13bit_max(ptr addrspace(1) %p) { -; GFX9-GISEL-LABEL: global_inst_valu_offset_neg_13bit_max: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xe000 -; GFX9-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: global_inst_valu_offset_neg_13bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xe000 -; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: global_inst_valu_offset_neg_13bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xe000 -; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SDAG-LABEL: global_inst_valu_offset_neg_13bit_max: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 -; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: global_inst_valu_offset_neg_13bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: global_inst_valu_offset_neg_13bit_max: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_neg_13bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX11-NEXT: global_load_u8 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192 %load = load i8, ptr addrspace(1) %gep, align 4 ret i8 %load @@ -320,11 +254,8 @@ define i8 @global_inst_valu_offset_2x_11bit_max(ptr addrspace(1) %p) { ; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_11bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0xfff -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 +; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -353,11 +284,8 @@ define i8 @global_inst_valu_offset_2x_12bit_max(ptr addrspace(1) %p) { ; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 +; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -365,11 +293,8 @@ define i8 @global_inst_valu_offset_2x_12bit_max(ptr addrspace(1) %p) { ; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x1fff -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 +; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -377,12 +302,8 @@ define i8 @global_inst_valu_offset_2x_12bit_max(ptr addrspace(1) %p) { ; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x1fff -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 +; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -422,11 +343,8 @@ define i8 @global_inst_valu_offset_2x_13bit_max(ptr addrspace(1) %p) { ; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], 0x3fff -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x3fff, v0 +; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -434,11 +352,8 @@ define i8 @global_inst_valu_offset_2x_13bit_max(ptr addrspace(1) %p) { ; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_mov_b64 s[4:5], 0x3fff -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0 +; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -446,12 +361,8 @@ define i8 @global_inst_valu_offset_2x_13bit_max(ptr addrspace(1) %p) { ; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_mov_b64 s[0:1], 0x3fff -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0 +; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -495,18 +406,14 @@ define i8 @global_inst_valu_offset_2x_neg_11bit_max(ptr addrspace(1) %p) { ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_neg_11bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xf000 -; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_inst_valu_offset_2x_neg_11bit_max: ; GFX11: ; %bb.0: @@ -514,159 +421,70 @@ define i8 @global_inst_valu_offset_2x_neg_11bit_max(ptr addrspace(1) %p) { ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_neg_11bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096 %load = load i8, ptr addrspace(1) %gep, align 4 ret i8 %load } define i8 @global_inst_valu_offset_2x_neg_12bit_max(ptr addrspace(1) %p) { -; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_neg_12bit_max: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xe000 -; GFX9-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_neg_12bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xe000 -; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_neg_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xe000 -; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SDAG-LABEL: global_inst_valu_offset_2x_neg_12bit_max: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 -; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_neg_12bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_neg_12bit_max: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_2x_neg_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX11-NEXT: global_load_u8 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192 %load = load i8, ptr addrspace(1) %gep, align 4 ret i8 %load } define i8 @global_inst_valu_offset_2x_neg_13bit_max(ptr addrspace(1) %p) { -; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_neg_13bit_max: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xc000 -; GFX9-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_neg_13bit_max: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xc000 -; GFX10-GISEL-NEXT: s_mov_b32 s5, -1 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_neg_13bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xc000 -; GFX11-GISEL-NEXT: s_mov_b32 s1, -1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SDAG-LABEL: global_inst_valu_offset_2x_neg_13bit_max: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 -; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_neg_13bit_max: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 -; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_neg_13bit_max: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 -; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo -; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_2x_neg_13bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX11-NEXT: global_load_u8 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr i8, ptr addrspace(1) %p, i64 -16384 %load = load i8, ptr addrspace(1) %gep, align 4 ret i8 %load diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll index ca79772dbed74c..536b2d054272ee 100644 --- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll +++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll @@ -3065,33 +3065,31 @@ define i64 @v_mul_284_add_82_i64(i64 %arg) { ; GFX7-LABEL: v_mul_284_add_82_i64: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v2, v1 -; GFX7-NEXT: v_mov_b32_e32 v3, 0x52 -; GFX7-NEXT: v_mov_b32_e32 v4, 0 -; GFX7-NEXT: s_movk_i32 s6, 0x11c -; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[3:4] -; GFX7-NEXT: v_mul_lo_u32 v2, v2, s6 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; GFX7-NEXT: s_movk_i32 s4, 0x11c +; GFX7-NEXT: v_mul_lo_u32 v3, v1, s4 +; GFX7-NEXT: v_mov_b32_e32 v1, 0x52 +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[1:2] +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_mul_284_add_82_i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v2, v1 -; GFX8-NEXT: v_mov_b32_e32 v3, 0x52 -; GFX8-NEXT: v_mov_b32_e32 v4, 0 -; GFX8-NEXT: s_movk_i32 s6, 0x11c -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[3:4] -; GFX8-NEXT: v_mul_lo_u32 v2, v2, s6 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 +; GFX8-NEXT: s_movk_i32 s4, 0x11c +; GFX8-NEXT: v_mul_lo_u32 v3, v1, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, 0x52 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[1:2] +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_mul_284_add_82_i64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v3, 0x52 -; GFX900-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-NEXT: s_movk_i32 s6, 0x11c +; GFX900-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-NEXT: v_mov_b32_e32 v2, v1 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[3:4] ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, s6, v[1:2] @@ -3101,8 +3099,8 @@ define i64 @v_mul_284_add_82_i64(i64 %arg) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v4, 0x52 -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 ; GFX90A-NEXT: s_movk_i32 s6, 0x11c +; GFX90A-NEXT: v_mov_b32_e32 v5, 0 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[4:5] ; GFX90A-NEXT: v_mov_b32_e32 v4, v1 @@ -3113,9 +3111,9 @@ define i64 @v_mul_284_add_82_i64(i64 %arg) { ; GFX10-LABEL: v_mul_284_add_82_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b64 s[4:5], 0x52 +; GFX10-NEXT: s_movk_i32 s4, 0x11c ; GFX10-NEXT: v_mov_b32_e32 v2, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, 0x11c, v0, s[4:5] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x52 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, 0x11c, v2, v[1:2] ; GFX10-NEXT: s_setpc_b64 s[30:31] %mul = mul i64 %arg, 284 @@ -3139,33 +3137,31 @@ define i64 @v_mul_934584645_add_8234599_i64(i64 %arg) { ; GFX7-LABEL: v_mul_934584645_add_8234599_i64: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v2, v1 -; GFX7-NEXT: v_mov_b32_e32 v3, 0x7da667 -; GFX7-NEXT: v_mov_b32_e32 v4, 0 -; GFX7-NEXT: s_mov_b32 s6, 0x37b4a145 -; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[3:4] -; GFX7-NEXT: v_mul_lo_u32 v2, v2, s6 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; GFX7-NEXT: s_mov_b32 s4, 0x37b4a145 +; GFX7-NEXT: v_mul_lo_u32 v3, v1, s4 +; GFX7-NEXT: v_mov_b32_e32 v1, 0x7da667 +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[1:2] +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_mul_934584645_add_8234599_i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v2, v1 -; GFX8-NEXT: v_mov_b32_e32 v3, 0x7da667 -; GFX8-NEXT: v_mov_b32_e32 v4, 0 -; GFX8-NEXT: s_mov_b32 s6, 0x37b4a145 -; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[3:4] -; GFX8-NEXT: v_mul_lo_u32 v2, v2, s6 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 +; GFX8-NEXT: s_mov_b32 s4, 0x37b4a145 +; GFX8-NEXT: v_mul_lo_u32 v3, v1, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, 0x7da667 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 +; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s4, v[1:2] +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_mul_934584645_add_8234599_i64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7da667 -; GFX900-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-NEXT: s_mov_b32 s6, 0x37b4a145 +; GFX900-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-NEXT: v_mov_b32_e32 v2, v1 ; GFX900-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[3:4] ; GFX900-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, s6, v[1:2] @@ -3175,8 +3171,8 @@ define i64 @v_mul_934584645_add_8234599_i64(i64 %arg) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v4, 0x7da667 -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 ; GFX90A-NEXT: s_mov_b32 s6, 0x37b4a145 +; GFX90A-NEXT: v_mov_b32_e32 v5, 0 ; GFX90A-NEXT: v_mov_b32_e32 v2, v1 ; GFX90A-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[4:5] ; GFX90A-NEXT: v_mov_b32_e32 v4, v1 @@ -3187,9 +3183,9 @@ define i64 @v_mul_934584645_add_8234599_i64(i64 %arg) { ; GFX10-LABEL: v_mul_934584645_add_8234599_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b64 s[4:5], 0x7da667 +; GFX10-NEXT: s_mov_b32 s4, 0x37b4a145 ; GFX10-NEXT: v_mov_b32_e32 v2, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, 0x37b4a145, v0, s[4:5] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x7da667 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, 0x37b4a145, v2, v[1:2] ; GFX10-NEXT: s_setpc_b64 s[30:31] %mul = mul i64 %arg, 934584645 diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll index 0654d555766456..3dc565ceed0d0b 100644 --- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll @@ -59,10 +59,8 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) { ; ; SI-GISEL-LABEL: s_rsq_f64: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_mov_b32 s2, 0 -; SI-GISEL-NEXT: s_brev_b32 s3, 8 -; SI-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc @@ -145,10 +143,8 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) { ; ; VI-GISEL-LABEL: s_rsq_f64: ; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_mov_b32 s2, 0 -; VI-GISEL-NEXT: s_brev_b32 s3, 8 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc @@ -243,11 +239,9 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) { ; ; SI-GISEL-LABEL: s_rsq_f64_fabs: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_mov_b32 s2, 0 -; SI-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; SI-GISEL-NEXT: s_brev_b32 s3, 8 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, s1 -; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0 @@ -329,11 +323,9 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) { ; ; VI-GISEL-LABEL: s_rsq_f64_fabs: ; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_mov_b32 s2, 0 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; VI-GISEL-NEXT: s_brev_b32 s3, 8 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 -; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |s[0:1]|, v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], |s[0:1]|, v0 @@ -428,10 +420,8 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) { ; ; SI-GISEL-LABEL: s_neg_rsq_f64: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_mov_b32 s2, 0 -; SI-GISEL-NEXT: s_brev_b32 s3, 8 -; SI-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc @@ -514,10 +504,8 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) { ; ; VI-GISEL-LABEL: s_neg_rsq_f64: ; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_mov_b32 s2, 0 -; VI-GISEL-NEXT: s_brev_b32 s3, 8 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc @@ -612,11 +600,9 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) { ; ; SI-GISEL-LABEL: s_neg_rsq_neg_f64: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_mov_b32 s2, 0 -; SI-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; SI-GISEL-NEXT: s_brev_b32 s3, 8 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, s1 -; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -v[0:1], s[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0 @@ -698,11 +684,9 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) { ; ; VI-GISEL-LABEL: s_neg_rsq_neg_f64: ; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_mov_b32 s2, 0 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; VI-GISEL-NEXT: s_brev_b32 s3, 8 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 -; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -v[0:1], s[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -s[0:1], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], -s[0:1], v0 @@ -797,11 +781,11 @@ define double @v_rsq_f64(double %x) { ; SI-GISEL-LABEL: v_rsq_f64: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -879,11 +863,11 @@ define double @v_rsq_f64(double %x) { ; VI-GISEL-LABEL: v_rsq_f64: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -966,11 +950,11 @@ define double @v_rsq_f64_fabs(double %x) { ; SI-GISEL-LABEL: v_rsq_f64_fabs: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -1048,11 +1032,11 @@ define double @v_rsq_f64_fabs(double %x) { ; VI-GISEL-LABEL: v_rsq_f64_fabs: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1136,11 +1120,11 @@ define double @v_rsq_f64_missing_contract0(double %x) { ; SI-GISEL-LABEL: v_rsq_f64_missing_contract0: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -1218,11 +1202,11 @@ define double @v_rsq_f64_missing_contract0(double %x) { ; VI-GISEL-LABEL: v_rsq_f64_missing_contract0: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1305,11 +1289,11 @@ define double @v_rsq_f64_missing_contract1(double %x) { ; SI-GISEL-LABEL: v_rsq_f64_missing_contract1: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -1387,11 +1371,11 @@ define double @v_rsq_f64_missing_contract1(double %x) { ; VI-GISEL-LABEL: v_rsq_f64_missing_contract1: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1474,11 +1458,11 @@ define double @v_neg_rsq_f64(double %x) { ; SI-GISEL-LABEL: v_neg_rsq_f64: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -1556,11 +1540,11 @@ define double @v_neg_rsq_f64(double %x) { ; VI-GISEL-LABEL: v_neg_rsq_f64: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -1680,24 +1664,26 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: s_mov_b32 s4, 0 ; SI-GISEL-NEXT: s_brev_b32 s5, 8 ; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v10, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; SI-GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] +; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5 +; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11] ; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80 -; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 -; SI-GISEL-NEXT: v_mov_b32_e32 v20, 0x3ff00000 ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 +; SI-GISEL-NEXT: v_mov_b32_e32 v20, 0x3ff00000 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[4:5] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5] ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc ; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -1825,12 +1811,14 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) { ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: s_mov_b32 s4, 0 ; VI-GISEL-NEXT: s_brev_b32 s5, 8 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5] +; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc +; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -1855,15 +1843,15 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) { ; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7] ; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] ; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] ; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9 ; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10 ; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] ; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], 1.0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] ; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5] ; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0 ; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 @@ -1977,24 +1965,26 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: s_mov_b32 s4, 0 ; SI-GISEL-NEXT: s_brev_b32 s5, 8 ; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v10, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; SI-GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] +; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5 +; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11] ; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80 -; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 -; SI-GISEL-NEXT: v_mov_b32_e32 v20, 0xbff00000 ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 +; SI-GISEL-NEXT: v_mov_b32_e32 v20, 0xbff00000 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[4:5] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5] ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc ; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -2122,12 +2112,14 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) { ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: s_mov_b32 s4, 0 ; VI-GISEL-NEXT: s_brev_b32 s5, 8 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5] +; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc +; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -2152,15 +2144,15 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) { ; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7] ; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] ; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] ; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9 ; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10 ; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] ; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] ; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5] ; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], -1.0 ; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], -1.0, v[2:3], -1.0 @@ -2242,15 +2234,17 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) { ; SI-GISEL-NEXT: s_mov_b32 s4, 0 ; SI-GISEL-NEXT: s_brev_b32 s5, 8 ; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v10, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; SI-GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] +; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5 +; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11] ; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80 -; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] @@ -2258,7 +2252,7 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[4:5] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5] ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc ; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -2358,12 +2352,14 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) { ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: s_mov_b32 s4, 0 ; VI-GISEL-NEXT: s_brev_b32 s5, 8 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5] +; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc +; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -2388,15 +2384,15 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) { ; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7] ; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] ; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] ; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9 ; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10 ; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] ; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] ; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5] ; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], s[4:5] ; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], s[4:5], v[2:3], s[4:5] @@ -2511,15 +2507,17 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: s_mov_b32 s4, 0 ; SI-GISEL-NEXT: s_brev_b32 s5, 8 ; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v10, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; SI-GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] +; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5 +; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11] ; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80 -; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260 ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] @@ -2527,7 +2525,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v10, s[4:5] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5] ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc ; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -2657,12 +2655,14 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: s_mov_b32 s4, 0 ; VI-GISEL-NEXT: s_brev_b32 s5, 8 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5] +; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc +; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -2687,15 +2687,15 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7] ; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] ; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5] ; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9 ; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10 ; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] ; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] ; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5] ; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0 ; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 @@ -2772,11 +2772,11 @@ define double @v_rsq_f64_fneg_fabs(double %x) { ; SI-GISEL-LABEL: v_rsq_f64_fneg_fabs: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, s[4:5] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -2854,11 +2854,11 @@ define double @v_rsq_f64_fneg_fabs(double %x) { ; VI-GISEL-LABEL: v_rsq_f64_fneg_fabs: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, s[4:5] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -2943,11 +2943,11 @@ define double @v_rsq_f64__afn_sqrt(double %x) { ; SI-GISEL-LABEL: v_rsq_f64__afn_sqrt: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -3025,11 +3025,11 @@ define double @v_rsq_f64__afn_sqrt(double %x) { ; VI-GISEL-LABEL: v_rsq_f64__afn_sqrt: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3104,11 +3104,11 @@ define double @v_rsq_f64__afn_fdiv(double %x) { ; SI-GISEL-LABEL: v_rsq_f64__afn_fdiv: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -3174,11 +3174,11 @@ define double @v_rsq_f64__afn_fdiv(double %x) { ; VI-GISEL-LABEL: v_rsq_f64__afn_fdiv: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3249,11 +3249,11 @@ define double @v_rsq_f64__afn(double %x) { ; SI-GISEL-LABEL: v_rsq_f64__afn: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -3319,11 +3319,11 @@ define double @v_rsq_f64__afn(double %x) { ; VI-GISEL-LABEL: v_rsq_f64__afn: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3395,11 +3395,11 @@ define double @v_neg_rsq_f64__afn(double %x) { ; SI-GISEL-LABEL: v_neg_rsq_f64__afn: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -3467,11 +3467,11 @@ define double @v_neg_rsq_f64__afn(double %x) { ; VI-GISEL-LABEL: v_neg_rsq_f64__afn: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3543,11 +3543,11 @@ define double @v_rsq_f64__afn_ninf(double %x) { ; SI-GISEL-LABEL: v_rsq_f64__afn_ninf: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -3613,11 +3613,11 @@ define double @v_rsq_f64__afn_ninf(double %x) { ; VI-GISEL-LABEL: v_rsq_f64__afn_ninf: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3688,11 +3688,11 @@ define double @v_rsq_f64__afn_nnan(double %x) { ; SI-GISEL-LABEL: v_rsq_f64__afn_nnan: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -3758,11 +3758,11 @@ define double @v_rsq_f64__afn_nnan(double %x) { ; VI-GISEL-LABEL: v_rsq_f64__afn_nnan: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3833,11 +3833,11 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) { ; SI-GISEL-LABEL: v_rsq_f64__afn_nnan_ninf: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -3903,11 +3903,11 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) { ; VI-GISEL-LABEL: v_rsq_f64__afn_nnan_ninf: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -3979,11 +3979,11 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) { ; SI-GISEL-LABEL: v_neg_rsq_f64__afn_nnan_ninf: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -4051,11 +4051,11 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) { ; VI-GISEL-LABEL: v_neg_rsq_f64__afn_nnan_ninf: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -4135,11 +4135,11 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; SI-GISEL-LABEL: v_rsq_f64__nnan_ninf: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -4217,11 +4217,11 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; VI-GISEL-LABEL: v_rsq_f64__nnan_ninf: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -4325,13 +4325,13 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) { ; SI-GISEL-NEXT: s_mov_b32 s4, 0 ; SI-GISEL-NEXT: s_brev_b32 s5, 8 ; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v10, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v12, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; SI-GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5 +; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11] ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 @@ -4339,28 +4339,30 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v10, s[4:5] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v12, s[4:5] ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v8 ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1] ; SI-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] -; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13 +; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[10:11], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[2:3], v[10:11] -; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260 +; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13 ; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5 -; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13 +; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260 ; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7] ; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3] -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13 ; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9] -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3] -; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9] ; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5] ; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6 +; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[0:1] @@ -4445,12 +4447,14 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) { ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: s_mov_b32 s4, 0 ; VI-GISEL-NEXT: s_brev_b32 s5, 8 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4 +; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5] -; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5 +; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[4:5] +; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7 ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1] ; VI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3] @@ -4543,10 +4547,8 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 { ; ; SI-GISEL-LABEL: s_rsq_f64_unsafe: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_mov_b32 s2, 0 -; SI-GISEL-NEXT: s_brev_b32 s3, 8 -; SI-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc @@ -4617,10 +4619,8 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 { ; ; VI-GISEL-LABEL: s_rsq_f64_unsafe: ; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_mov_b32 s2, 0 -; VI-GISEL-NEXT: s_brev_b32 s3, 8 -; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v1, 8 ; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1] ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc @@ -4703,11 +4703,11 @@ define double @v_rsq_f64_unsafe(double %x) #0 { ; SI-GISEL-LABEL: v_rsq_f64_unsafe: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] @@ -4773,11 +4773,11 @@ define double @v_rsq_f64_unsafe(double %x) #0 { ; VI-GISEL-LABEL: v_rsq_f64_unsafe: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 @@ -5109,14 +5109,15 @@ define double @v_div_contract_sqrt_f64(double %x, double %y) { ; SI-GISEL-LABEL: v_div_contract_sqrt_f64: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v11, 0x260 ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 @@ -5128,8 +5129,7 @@ define double @v_div_contract_sqrt_f64(double %x, double %y) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] ; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v10, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6 -; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x260 -; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v6 +; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v11 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; SI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1] @@ -5190,11 +5190,11 @@ define double @v_div_contract_sqrt_f64(double %x, double %y) { ; VI-GISEL-LABEL: v_div_contract_sqrt_f64: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] ; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 @@ -5276,14 +5276,15 @@ define double @v_div_arcp_sqrt_f64(double %x, double %y) { ; SI-GISEL-LABEL: v_div_arcp_sqrt_f64: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v11, 0x260 ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 @@ -5295,8 +5296,7 @@ define double @v_div_arcp_sqrt_f64(double %x, double %y) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] ; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v10, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6 -; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x260 -; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v6 +; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v11 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; SI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1] @@ -5357,11 +5357,11 @@ define double @v_div_arcp_sqrt_f64(double %x, double %y) { ; VI-GISEL-LABEL: v_div_arcp_sqrt_f64: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] ; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 @@ -5443,14 +5443,15 @@ define double @v_div_contract_arcp_sqrt_f64(double %x, double %y) { ; SI-GISEL-LABEL: v_div_contract_arcp_sqrt_f64: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s4, 0 -; SI-GISEL-NEXT: s_brev_b32 s5, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3] -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5] +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v11, 0x260 ; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], v[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5 @@ -5462,8 +5463,7 @@ define double @v_div_contract_arcp_sqrt_f64(double %x, double %y) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5] ; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v10, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6 -; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x260 -; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v6 +; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v11 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; SI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1] @@ -5524,11 +5524,11 @@ define double @v_div_contract_arcp_sqrt_f64(double %x, double %y) { ; VI-GISEL-LABEL: v_div_contract_arcp_sqrt_f64: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3] -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[2:3], v[4:5] +; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x100 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[2:3] ; VI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5 @@ -5568,16 +5568,18 @@ define double @v_div_const_contract_sqrt_f64(double %x) { ; SI-SDAG-LABEL: v_div_const_contract_sqrt_f64: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_brev_b32 s7, 8 -; SI-SDAG-NEXT: s_mov_b32 s6, 0 -; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[0:1] +; SI-SDAG-NEXT: s_mov_b32 s4, 0 +; SI-SDAG-NEXT: s_brev_b32 s5, 8 +; SI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] ; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-SDAG-NEXT: v_mov_b32_e32 v9, 0x260 ; SI-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] +; SI-SDAG-NEXT: s_mov_b32 s6, 0 ; SI-SDAG-NEXT: s_mov_b32 s7, 0x40700000 +; SI-SDAG-NEXT: s_mov_b32 s8, 0x40700000 ; SI-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3] ; SI-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5 ; SI-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5 @@ -5600,7 +5602,7 @@ define double @v_div_const_contract_sqrt_f64(double %x) { ; SI-SDAG-NEXT: v_div_scale_f64 v[6:7], s[4:5], s[6:7], v[0:1], s[6:7] ; SI-SDAG-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0 ; SI-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] -; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s7, v7 +; SI-SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], s8, v7 ; SI-SDAG-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5] ; SI-SDAG-NEXT: s_xor_b64 vcc, s[4:5], vcc ; SI-SDAG-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7] @@ -5611,19 +5613,20 @@ define double @v_div_const_contract_sqrt_f64(double %x) { ; SI-GISEL-LABEL: v_div_const_contract_sqrt_f64: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: s_mov_b32 s6, 0 -; SI-GISEL-NEXT: s_brev_b32 s7, 8 -; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[0:1] -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80 ; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260 +; SI-GISEL-NEXT: s_mov_b32 s6, 0 ; SI-GISEL-NEXT: s_mov_b32 s7, 0x40700000 -; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x40700000 ; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] +; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x40700000 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5 ; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3] ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -5655,9 +5658,10 @@ define double @v_div_const_contract_sqrt_f64(double %x) { ; VI-SDAG-LABEL: v_div_const_contract_sqrt_f64: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: s_brev_b32 s5, 8 ; VI-SDAG-NEXT: s_mov_b32 s4, 0 +; VI-SDAG-NEXT: s_brev_b32 s5, 8 ; VI-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] +; VI-SDAG-NEXT: s_mov_b32 s4, 0 ; VI-SDAG-NEXT: s_mov_b32 s5, 0x40700000 ; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2 @@ -5695,12 +5699,13 @@ define double @v_div_const_contract_sqrt_f64(double %x) { ; VI-GISEL-LABEL: v_div_const_contract_sqrt_f64: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8 +; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x100 ; VI-GISEL-NEXT: s_mov_b32 s4, 0 -; VI-GISEL-NEXT: s_brev_b32 s5, 8 -; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; VI-GISEL-NEXT: s_mov_b32 s5, 0x40700000 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; VI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1] ; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5 diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll index 7027521d7e2dce..714b2af1698fe1 100644 --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -171,10 +171,10 @@ entry: ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8: ; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} ; CI-NOHSA-NOT: v_add -; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16 +; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16 ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} ; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} -; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} +; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} @@ -201,11 +201,11 @@ entry: ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16: -; SI: s_mov_b32 {{s[0-9]+}}, 0x13480 +; SI-DAG: s_mov_b64 s[{{[0-9:]+}}], 0x13480 ; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16 ; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:32 ; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48 -; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64 +; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64 ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} ; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index 4f2fd3f50494c9..9cb6842ae0a182 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -1592,31 +1592,30 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v6, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[5:6] +; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[5:6] ; GCN-IR-NEXT: v_mov_b32_e32 v7, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[5:6] ; GCN-IR-NEXT: v_cndmask_b32_e64 v7, v7, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GCN-IR-NEXT: v_mov_b32_e32 v3, v2 ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 -; GCN-IR-NEXT: s_mov_b64 s[8:9], 0x8000 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v9, vcc, 1, v5 -; GCN-IR-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v4, s[4:5], 63, v5 +; GCN-IR-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc +; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 ; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[9:10] -; GCN-IR-NEXT: v_lshl_b64 v[4:5], s[8:9], v4 +; GCN-IR-NEXT: v_lshl_b64 v[4:5], s[4:5], v4 ; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, -1, v0 -; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 ; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, -1, v1, vcc ; GCN-IR-NEXT: v_lshr_b64 v[10:11], s[4:5], v9 ; GCN-IR-NEXT: v_sub_i32_e32 v8, vcc, 47, v8 diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll index be0aa394dd99dc..9cd13b3d451509 100644 --- a/llvm/test/CodeGen/AMDGPU/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.ll @@ -1284,18 +1284,15 @@ define amdgpu_kernel void @v_shl_i64_32_bit_constant(ptr addrspace(1) %out, ptr ; VI-LABEL: v_shl_i64_32_bit_constant: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_load_dword s2, s[2:3], 0x0 -; VI-NEXT: s_mov_b32 s4, s0 -; VI-NEXT: s_mov_b32 s5, s1 -; VI-NEXT: s_mov_b64 s[0:1], 0x12d687 +; VI-NEXT: s_load_dword s4, s[2:3], 0x0 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; VI-NEXT: s_lshl_b64 s[4:5], 0x12d687, s4 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; ; EG-LABEL: v_shl_i64_32_bit_constant: @@ -1875,30 +1872,28 @@ define amdgpu_kernel void @s_shl_inline_imm_neg_4_0_i64(ptr addrspace(1) %out, p define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) { ; SI-LABEL: s_shl_inline_imm_f32_4_0_i64: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s2, s[0:1], 0xd -; SI-NEXT: s_mov_b64 s[0:1], 0x40800000 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xd +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: v_mov_b32_e32 v1, s1 -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: s_lshl_b64 s[4:5], 0x40800000, s4 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: v_mov_b32_e32 v1, s5 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: s_shl_inline_imm_f32_4_0_i64: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dword s2, s[0:1], 0x34 -; VI-NEXT: s_mov_b64 s[0:1], 0x40800000 -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dword s4, s[0:1], 0x34 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; VI-NEXT: s_lshl_b64 s[4:5], 0x40800000, s4 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; ; EG-LABEL: s_shl_inline_imm_f32_4_0_i64: @@ -1931,10 +1926,10 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(ptr addrspace(1) %ou ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-NEXT: s_load_dword s2, s[0:1], 0xd -; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_mov_b32 s0, -4.0 -; SI-NEXT: s_mov_b32 s1, s6 +; SI-NEXT: s_mov_b32 s1, -1 ; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 ; SI-NEXT: v_mov_b32_e32 v0, s0 @@ -1946,10 +1941,10 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(ptr addrspace(1) %ou ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; VI-NEXT: s_load_dword s2, s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_mov_b32 s0, -4.0 -; VI-NEXT: s_mov_b32 s1, s6 +; VI-NEXT: s_mov_b32 s1, -1 ; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 ; VI-NEXT: v_mov_b32_e32 v0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll index ebd7dfdd0b92a1..bd3a1ca80430f9 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -16,2668 +16,2662 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX6-LABEL: test: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_mov_b32 s44, SCRATCH_RSRC_DWORD0 -; GFX6-NEXT: s_mov_b32 s45, SCRATCH_RSRC_DWORD1 -; GFX6-NEXT: s_mov_b32 s46, -1 -; GFX6-NEXT: s_mov_b32 s47, 0xe8f000 -; GFX6-NEXT: s_add_u32 s44, s44, s3 +; GFX6-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 +; GFX6-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 +; GFX6-NEXT: s_mov_b32 s42, -1 +; GFX6-NEXT: s_mov_b32 s43, 0xe8f000 +; GFX6-NEXT: s_add_u32 s40, s40, s3 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GFX6-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 ; GFX6-NEXT: v_mbcnt_hi_u32_b32_e32 v0, -1, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 13, v0 -; GFX6-NEXT: s_mov_b32 s18, 0 -; GFX6-NEXT: s_mov_b32 s19, 0xf000 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v5 -; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX6-NEXT: s_movk_i32 s4, 0x80 -; GFX6-NEXT: s_mov_b32 s5, s18 -; GFX6-NEXT: s_mov_b64 s[6:7], s[18:19] -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:3968 -; GFX6-NEXT: s_addc_u32 s45, s45, 0 -; GFX6-NEXT: s_movk_i32 s8, 0x100 -; GFX6-NEXT: s_mov_b32 s9, s18 -; GFX6-NEXT: s_mov_b64 s[10:11], s[18:19] -; GFX6-NEXT: s_movk_i32 s12, 0x180 -; GFX6-NEXT: s_mov_b32 s13, s18 -; GFX6-NEXT: s_mov_b64 s[14:15], s[18:19] -; GFX6-NEXT: s_movk_i32 s20, 0x200 -; GFX6-NEXT: s_mov_b32 s21, s18 -; GFX6-NEXT: s_mov_b64 s[22:23], s[18:19] -; GFX6-NEXT: s_movk_i32 s24, 0x280 -; GFX6-NEXT: s_mov_b32 s25, s18 -; GFX6-NEXT: s_mov_b64 s[26:27], s[18:19] -; GFX6-NEXT: s_movk_i32 s28, 0x300 -; GFX6-NEXT: s_mov_b32 s29, s18 -; GFX6-NEXT: s_mov_b64 s[30:31], s[18:19] -; GFX6-NEXT: s_movk_i32 s36, 0x380 -; GFX6-NEXT: s_mov_b32 s37, s18 -; GFX6-NEXT: s_mov_b64 s[38:39], s[18:19] -; GFX6-NEXT: s_movk_i32 s40, 0x400 -; GFX6-NEXT: s_mov_b32 s41, s18 -; GFX6-NEXT: s_mov_b64 s[42:43], s[18:19] -; GFX6-NEXT: s_mov_b64 s[16:17], s[2:3] +; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: v_mov_b32_e32 v6, 0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_add_i32_e32 v7, vcc, s2, v5 +; GFX6-NEXT: v_mov_b32_e32 v0, s3 +; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3] +; GFX6-NEXT: v_addc_u32_e32 v8, vcc, 0, v0, vcc +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 +; GFX6-NEXT: s_addc_u32 s41, s41, 0 ; GFX6-NEXT: s_mov_b32 s2, 0x3fd00 +; GFX6-NEXT: s_mov_b64 s[8:9], 0x100 +; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX6-NEXT: s_mov_b64 s[12:13], 0x180 +; GFX6-NEXT: s_mov_b64 s[14:15], s[6:7] +; GFX6-NEXT: s_mov_b64 s[16:17], 0x200 +; GFX6-NEXT: s_mov_b64 s[18:19], s[6:7] +; GFX6-NEXT: s_mov_b64 s[20:21], 0x280 +; GFX6-NEXT: s_mov_b64 s[22:23], s[6:7] +; GFX6-NEXT: s_mov_b64 s[24:25], 0x300 +; GFX6-NEXT: s_mov_b64 s[26:27], s[6:7] +; GFX6-NEXT: s_mov_b64 s[28:29], 0x380 +; GFX6-NEXT: s_mov_b64 s[30:31], s[6:7] +; GFX6-NEXT: s_mov_b64 s[36:37], 0x400 +; GFX6-NEXT: s_mov_b64 s[38:39], s[6:7] +; GFX6-NEXT: s_mov_b32 s33, 0x4f900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1268 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:4 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1272 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1276 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1280 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:16 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1300 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:20 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1304 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1308 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1312 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:24 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:28 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:32 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:32 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1332 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:36 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1336 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1340 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1344 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:40 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:44 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:48 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:48 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1364 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:52 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1368 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1372 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1376 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:56 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:60 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:64 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:64 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1396 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:68 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1400 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1404 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1408 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:72 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:76 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:80 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:80 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1428 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:84 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1432 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1436 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1440 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:88 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:92 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:96 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:96 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1460 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:100 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1464 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1468 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1472 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:104 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:108 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:112 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:112 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1492 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:116 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1496 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1500 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1504 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:120 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:124 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:128 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:128 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1556 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:132 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1560 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1564 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1568 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:136 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:140 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:144 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:144 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1588 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:148 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1592 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1596 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1600 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:152 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:156 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:160 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:160 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1620 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:164 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1624 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1628 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1632 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:168 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:172 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:176 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:176 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1652 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:180 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1656 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1660 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1664 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:184 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:188 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:192 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:192 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1684 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:196 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1688 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1692 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1696 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:200 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:204 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:208 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:208 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1716 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:212 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1720 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1724 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1728 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:216 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:220 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:224 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:224 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1748 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:228 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1752 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1756 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1760 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:232 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:236 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:240 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:240 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1780 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:244 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1784 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1788 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1792 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:248 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:252 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:256 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:256 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1860 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:260 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1864 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1868 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1872 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:264 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:268 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:272 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:272 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1892 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:276 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1896 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1900 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1904 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:280 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:284 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:288 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:288 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1924 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:292 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1928 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1932 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1936 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:296 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:300 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:304 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:304 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1956 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:308 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1960 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1964 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1968 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:312 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:316 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:320 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:320 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1988 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:324 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1992 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1996 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2000 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:328 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:332 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:336 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:336 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2020 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:340 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2024 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2028 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2032 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:344 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:348 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:352 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:352 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2052 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:356 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2056 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2060 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2064 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:360 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:364 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:368 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:368 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2084 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:372 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2088 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2092 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2096 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:376 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:380 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:384 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:384 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2148 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:388 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2152 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2156 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2160 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:392 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:396 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:400 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:400 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2180 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:404 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2184 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2188 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2192 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:408 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:412 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:416 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:416 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2212 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:420 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2216 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2220 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2224 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:424 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:428 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:432 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:432 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2244 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:436 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2248 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2252 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2256 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:440 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:444 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:448 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:448 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2276 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:452 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2280 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2284 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2288 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:456 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:460 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:464 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:464 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2308 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:468 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2312 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2316 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2320 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:472 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:476 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:480 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:480 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2340 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:484 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2344 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2348 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2352 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:488 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:492 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:496 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:496 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2372 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:500 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2376 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2380 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2384 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:504 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:508 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:512 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:512 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2452 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:516 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2456 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2460 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2464 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:520 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:524 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:528 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:528 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2484 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:532 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2488 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2492 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2496 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:536 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:540 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:544 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:544 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2516 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:548 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2520 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2524 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2528 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:552 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:556 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:560 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:560 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2548 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:564 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2552 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2556 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2560 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:568 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:572 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:576 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:576 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2580 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:580 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2584 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2588 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2592 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:584 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:588 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:592 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:592 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2612 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:596 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2616 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2620 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2624 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:600 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:604 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:608 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:608 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2644 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:612 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2648 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2652 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2656 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:616 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:620 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:624 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:624 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2676 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:628 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2680 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2684 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2688 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:632 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:636 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:640 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:640 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2740 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:644 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2744 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2748 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2752 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:648 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:652 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:656 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:656 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2772 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:660 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2776 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2780 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2784 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:664 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:668 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:672 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:672 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2804 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:676 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2808 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2812 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2816 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:680 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:684 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:688 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:688 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2836 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:692 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2840 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2844 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2848 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:696 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:700 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:704 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:704 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2868 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:708 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2872 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2876 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2880 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:712 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:716 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:720 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:720 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2900 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:724 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2904 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2908 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2912 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:728 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:732 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:736 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:736 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2932 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:740 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2936 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2940 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2944 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:744 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:748 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:752 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:752 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2964 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:756 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2968 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2972 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2976 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:760 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:764 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:768 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:768 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3044 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:772 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3048 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3052 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3056 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:776 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:780 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:784 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:784 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3076 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:788 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3080 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3084 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3088 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:792 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:796 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:800 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:800 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3108 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:804 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3112 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3116 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3120 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:808 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:812 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:816 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:816 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3140 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:820 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3144 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3148 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3152 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:824 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:828 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:832 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:832 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3172 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:836 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3176 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3180 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3184 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:840 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:844 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:848 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:848 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3204 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:852 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3208 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3212 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3216 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:856 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:860 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:864 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:864 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3236 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:868 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3240 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3244 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3248 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:872 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:876 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:880 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:880 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3268 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:884 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3272 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3276 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3280 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:888 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:892 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:896 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:896 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3332 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:900 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3336 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3340 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3344 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:904 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:908 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:912 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:912 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3364 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:916 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3368 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3372 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3376 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:920 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:924 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:928 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:928 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3396 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:932 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3400 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3404 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3408 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:936 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:940 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:944 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:944 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3428 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:948 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3432 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3436 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3440 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:952 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:956 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:960 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:960 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3460 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:964 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3464 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3468 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3472 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:968 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:972 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:976 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:976 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3492 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:980 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3496 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3500 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3504 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:984 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:988 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:992 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:992 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3524 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:996 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3528 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3532 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3536 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[40:43], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt expcnt(3) -; GFX6-NEXT: v_add_i32_e32 v7, vcc, s0, v5 +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1000 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1004 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1008 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1008 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3556 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1012 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3560 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3564 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3568 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1016 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1020 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1024 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1024 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1028 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:16 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1032 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1036 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1040 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:16 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1040 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:20 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1044 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:24 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:28 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:32 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1048 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1052 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1056 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:32 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1056 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:36 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1060 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:40 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:44 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:48 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1064 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1068 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1072 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:48 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1072 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:52 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1076 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:56 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:60 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:64 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1080 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1084 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1088 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:64 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1088 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:68 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1092 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:72 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:76 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:80 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1096 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1100 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1104 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:80 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1104 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:84 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1108 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:88 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:92 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:96 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1112 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1116 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1120 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:96 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1120 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:100 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1124 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:104 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:108 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:112 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1128 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1132 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1136 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:112 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1136 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:116 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1140 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:120 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:124 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:128 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1144 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1148 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1152 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:128 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1152 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:132 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1156 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:136 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:140 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:144 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1160 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1164 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1168 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:144 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1168 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:148 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1172 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:152 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:156 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:160 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1176 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1180 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1184 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:160 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1184 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:164 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1188 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:168 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:172 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:176 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1192 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1196 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1200 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:176 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1200 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:180 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1204 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:184 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:188 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:192 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1208 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1212 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1216 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:192 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1216 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:196 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1220 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:200 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:204 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:208 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1224 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1228 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1232 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:208 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1232 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:212 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1236 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:216 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:220 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:224 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1240 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1244 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1248 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:224 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1248 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:228 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1252 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:232 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:236 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:240 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1256 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1260 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1264 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:240 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1264 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:244 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1268 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:248 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:252 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:256 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1272 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1276 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1280 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:256 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1280 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:260 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1284 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:264 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:268 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:272 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1288 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1292 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1296 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:272 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1296 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:276 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1300 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:280 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:284 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:288 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1304 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1308 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1312 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:288 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1312 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:292 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1316 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:296 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:300 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:304 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1320 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1324 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1328 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:304 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1328 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:308 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1332 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:312 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:316 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:320 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1336 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1340 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1344 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:320 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1344 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:324 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1348 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:328 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:332 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:336 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1352 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1356 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1360 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:336 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1360 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:340 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1364 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:344 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:348 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:352 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1368 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1372 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1376 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:352 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1376 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:356 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1380 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:360 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:364 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:368 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1384 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1388 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1392 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:368 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1392 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:372 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1396 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:376 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:380 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:384 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1400 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1404 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1408 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:384 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1408 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:388 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1412 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:392 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:396 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:400 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1416 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1420 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1424 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:400 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1424 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:404 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1428 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:408 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:412 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:416 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1432 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1436 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1440 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:416 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1440 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:420 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1444 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:424 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:428 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:432 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1448 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1452 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1456 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:432 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1456 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:436 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1460 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:440 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:444 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:448 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1464 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1468 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1472 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:448 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1472 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:452 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1476 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:456 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:460 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:464 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1480 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1484 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1488 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:464 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1488 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:468 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1492 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:472 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:476 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:480 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1496 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1500 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1504 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:480 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1504 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:484 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1508 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:488 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:492 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:496 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1512 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1516 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1520 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:496 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1520 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:500 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1524 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:504 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:508 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:512 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1528 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1532 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1536 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:512 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1536 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:516 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1540 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:520 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:524 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:528 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1544 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1548 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1552 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:528 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1552 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:532 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1556 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:536 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:540 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:544 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1560 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1564 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1568 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:544 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1568 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:548 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1572 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:552 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:556 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:560 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1576 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1580 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1584 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:560 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1584 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:564 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1588 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:568 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:572 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:576 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1592 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1596 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1600 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:576 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1600 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:580 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1604 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:584 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:588 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:592 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1608 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1612 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1616 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:592 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1616 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:596 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1620 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:600 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:604 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:608 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1624 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1628 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1632 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:608 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1632 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:612 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1636 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:616 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:620 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:624 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1640 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1644 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1648 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:624 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1648 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:628 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1652 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:632 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:636 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:640 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1656 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1660 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1664 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:640 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1664 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:644 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1668 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:648 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:652 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:656 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1672 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1676 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1680 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:656 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1680 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:660 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1684 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:664 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:668 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:672 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1688 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1692 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1696 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:672 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1696 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:676 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1700 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:680 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:684 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:688 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1704 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1708 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1712 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:688 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1712 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:692 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1716 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:696 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:700 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:704 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1720 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1724 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1728 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:704 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1728 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:708 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1732 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:712 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:716 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:720 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1736 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1740 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1744 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:720 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1744 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:724 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1748 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:728 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:732 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:736 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1752 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1756 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1760 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:736 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1760 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:740 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1764 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:744 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:748 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:752 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1768 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1772 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1776 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:752 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1776 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:756 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1780 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:760 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:764 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:768 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1784 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1788 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1792 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:768 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1792 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:772 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1796 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:776 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:780 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:784 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1800 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1804 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1808 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:784 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1808 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:788 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1812 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:792 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:796 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:800 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1816 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1820 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1824 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:800 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1824 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:804 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1828 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:808 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:812 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:816 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1832 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1836 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1840 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:816 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1840 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:820 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1844 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:824 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:828 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:832 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1848 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1852 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1856 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:832 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1856 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:836 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1860 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:840 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:844 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:848 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1864 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1868 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1872 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:848 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1872 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:852 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1876 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:856 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:860 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:864 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1880 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1884 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1888 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:864 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1888 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:868 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1892 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:872 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:876 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:880 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1896 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1900 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1904 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:880 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1904 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:884 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1908 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:888 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:892 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:896 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1912 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1916 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1920 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:896 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1920 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:900 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1924 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:904 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:908 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:912 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1928 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1932 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1936 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:912 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1936 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:916 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1940 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:920 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:924 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:928 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1944 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1948 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1952 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:928 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1952 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:932 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1956 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:936 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:940 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:944 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1960 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1964 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1968 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:944 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1968 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:948 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1972 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:952 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:956 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:960 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1976 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1980 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:1984 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:960 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:1984 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:964 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:1988 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:968 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:972 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:976 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:1992 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:1996 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2000 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:976 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2000 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:980 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2004 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:984 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:988 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:992 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2008 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2012 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2016 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:992 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2016 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:996 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2020 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1000 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1004 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1008 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2024 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2028 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2032 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1008 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2032 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1012 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2036 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1016 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1020 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1024 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2040 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2044 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2048 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1024 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2048 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1028 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2052 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1032 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1036 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1040 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2056 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2060 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2064 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1040 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2064 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1044 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2068 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1048 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1052 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1056 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2072 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2076 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2080 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1056 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2080 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1060 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2084 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1064 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1068 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1072 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2088 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2092 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2096 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1072 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2096 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1076 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2100 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1080 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1084 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1088 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2104 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2108 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2112 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1088 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2112 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1092 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2116 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1096 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1100 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1104 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2120 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2124 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2128 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1104 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2128 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1108 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2132 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1112 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1116 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1120 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2136 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2140 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2144 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1120 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2144 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1124 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2148 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1128 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1132 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1136 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2152 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2156 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2160 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1136 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2160 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1140 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2164 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1144 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1148 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1152 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2168 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2172 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2176 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1152 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2176 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1156 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2180 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1160 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1164 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1168 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2184 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2188 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2192 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1168 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2192 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1172 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2196 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1176 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1180 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1184 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2200 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2204 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2208 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1184 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2208 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1188 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2212 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1192 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1196 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1200 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2216 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2220 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2224 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1200 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2224 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1204 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2228 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1208 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1212 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1216 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2232 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2236 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2240 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1216 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2240 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1220 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2244 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1224 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1228 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1232 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2248 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2252 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2256 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1232 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2256 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1236 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2260 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1240 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1244 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1248 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2264 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2268 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2272 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1248 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2272 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1252 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2276 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1256 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1260 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1264 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2280 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2284 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2288 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1264 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2288 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1284 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2292 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1288 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1292 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1296 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2296 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2300 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2304 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1280 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2304 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1316 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2308 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1320 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1324 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1328 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2312 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2316 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2320 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1296 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2320 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1348 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2324 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1352 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1356 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1360 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2328 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2332 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2336 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1312 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2336 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1380 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2340 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1384 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1388 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1392 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2344 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2348 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2352 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1328 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2352 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1412 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2356 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1416 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1420 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1424 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2360 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2364 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2368 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1344 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2368 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1444 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2372 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1448 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1452 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1456 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2376 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2380 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2384 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1360 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2384 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1476 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2388 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1480 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1484 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1488 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2392 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2396 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2400 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1376 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2400 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1508 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2404 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1512 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1516 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1520 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2408 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2412 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2416 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1392 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2416 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1524 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2420 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1528 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1532 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1536 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2424 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2428 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2432 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1408 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2432 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1540 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2436 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1544 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1548 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1552 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2440 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2444 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2448 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1424 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2448 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1572 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2452 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1576 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1580 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1584 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2456 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2460 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2464 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1440 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2464 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1604 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2468 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1608 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1612 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1616 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2472 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2476 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2480 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1456 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2480 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1636 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2484 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1640 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1644 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1648 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2488 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2492 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2496 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1472 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2496 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1668 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2500 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1672 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1676 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1680 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2504 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2508 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2512 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1488 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2512 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1700 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2516 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1704 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1708 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1712 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2520 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2524 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2528 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1504 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2528 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1732 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2532 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1736 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1740 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1744 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2536 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2540 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2544 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1520 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2544 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1764 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2548 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1768 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1772 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1776 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2552 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2556 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2560 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1536 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2560 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1796 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2564 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1800 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1804 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1808 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2568 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2572 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2576 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1552 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2576 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1812 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2580 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1816 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1820 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1824 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2584 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2588 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2592 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1568 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2592 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1828 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2596 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1832 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1836 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1840 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2600 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2604 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2608 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1584 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2608 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1844 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2612 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1848 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1852 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1856 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2616 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2620 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2624 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1600 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2624 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1876 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2628 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1880 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1884 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1888 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2632 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2636 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2640 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1616 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2640 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1908 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2644 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1912 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1916 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1920 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2648 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2652 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2656 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1632 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2656 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1940 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2660 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1944 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1948 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1952 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2664 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2668 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2672 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1648 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2672 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1972 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2676 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1976 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1980 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1984 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2680 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2684 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2688 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1664 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2688 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2004 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2692 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2008 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2012 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2016 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2696 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2700 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2704 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1680 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2704 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2036 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2708 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2040 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2044 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2048 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2712 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2716 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2720 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1696 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2720 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2068 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2724 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2072 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2076 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2080 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2728 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2732 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2736 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1712 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2736 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2100 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2740 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2104 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2108 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2112 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2744 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2748 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2752 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1728 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2752 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2116 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2756 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2120 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2124 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2128 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2760 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2764 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2768 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1744 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2768 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2132 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2772 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2136 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2140 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2144 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2776 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2780 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2784 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1760 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2784 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2164 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2788 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2168 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2172 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2176 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2792 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2796 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2800 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1776 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2800 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2196 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2804 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2200 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2204 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2208 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2808 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2812 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2816 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1792 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2816 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2228 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2820 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2232 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2236 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2240 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2824 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2828 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2832 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1808 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2832 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2260 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2836 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2264 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2268 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2272 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2840 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2844 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2848 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1824 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2848 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2292 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2852 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2296 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2300 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2304 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2856 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2860 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2864 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1840 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2864 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2324 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2868 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2328 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2332 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2336 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2872 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2876 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2880 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1856 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2880 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2356 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2884 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2360 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2364 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2368 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2888 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2892 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2896 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1872 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2896 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2388 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2900 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2392 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2396 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2400 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2904 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2908 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2912 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1888 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2912 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2404 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2916 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2408 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2412 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2416 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2920 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2924 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2928 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1904 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2928 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2420 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2932 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2424 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2428 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2432 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2936 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2940 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2944 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1920 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2944 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2436 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2948 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2440 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2444 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2448 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2952 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2956 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2960 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1936 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2960 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2468 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2964 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2472 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2476 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2480 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2968 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2972 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2976 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1952 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2976 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2500 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2980 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2504 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2508 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2512 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:2984 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:2988 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:2992 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1968 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:2992 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2532 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:2996 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2536 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2540 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2544 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3000 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3004 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3008 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1984 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3008 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2564 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3012 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2568 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2572 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2576 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3016 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3020 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3024 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2000 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3024 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2596 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3028 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2600 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2604 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2608 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3032 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3036 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3040 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2016 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3040 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2628 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3044 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2632 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2636 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2640 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3048 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3052 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3056 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2032 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3056 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2660 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3060 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2664 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2668 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2672 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3064 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3068 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3072 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2048 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3072 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2692 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3076 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2696 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2700 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2704 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3080 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3084 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3088 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2064 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3088 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2708 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3092 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2712 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2716 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2720 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3096 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3100 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3104 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2080 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3104 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2724 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3108 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2728 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2732 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2736 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3112 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3116 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3120 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2096 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3120 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2756 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3124 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2760 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2764 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2768 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3128 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3132 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3136 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2112 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3136 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2788 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3140 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2792 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2796 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2800 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3144 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3148 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3152 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2128 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3152 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2820 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3156 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2824 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2828 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2832 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3160 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3164 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3168 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2144 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3168 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2852 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3172 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2856 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2860 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2864 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3176 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3180 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3184 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2160 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3184 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2884 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3188 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2888 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2892 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2896 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3192 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3196 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3200 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2176 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3200 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2916 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3204 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2920 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2924 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2928 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3208 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3212 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3216 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2192 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3216 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2948 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3220 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2952 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2956 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2960 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3224 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3228 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3232 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2208 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3232 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2980 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3236 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2984 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2988 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2992 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3240 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3244 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3248 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2224 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3248 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2996 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3252 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3000 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3004 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3008 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3256 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3260 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3264 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2240 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3264 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3012 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3268 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3016 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3020 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3024 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3272 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3276 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3280 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2256 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3280 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3028 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3284 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3032 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3036 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3040 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3288 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3292 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3296 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2272 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3296 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3060 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3300 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3064 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3068 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3072 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3304 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3308 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3312 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2288 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3312 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3092 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3316 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3096 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3100 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3104 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3320 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3324 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3328 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2304 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3328 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3124 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3332 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3128 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3132 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3136 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3336 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3340 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3344 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2320 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3344 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3156 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3348 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3160 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3164 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3168 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3352 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3356 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3360 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2336 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3360 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3188 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3364 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3192 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3196 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3200 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3368 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3372 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3376 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2352 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3376 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3220 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3380 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3224 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3228 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3232 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3384 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3388 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3392 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2368 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3392 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3252 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3396 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3256 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3260 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3264 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3400 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3404 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3408 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2384 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3408 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3284 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3412 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3288 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3292 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3296 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3416 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3420 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3424 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2400 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3424 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3300 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3428 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3304 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3308 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3312 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3432 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3436 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3440 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2416 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3440 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3316 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3444 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3320 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3324 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3328 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3448 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3452 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3456 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2432 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3456 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3348 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3460 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3352 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3356 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3360 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3464 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3468 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3472 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2448 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3472 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3380 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3476 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3384 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3388 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3392 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3480 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3484 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3488 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2464 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3488 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3412 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3492 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3416 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3420 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3424 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3496 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3500 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3504 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2480 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3504 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3444 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3508 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3448 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3452 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3456 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3512 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3516 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3520 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2496 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3520 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3476 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3524 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3480 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3484 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3488 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3528 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3532 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3536 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2512 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3536 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3508 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3540 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3512 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3516 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3520 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3544 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3548 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3552 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2528 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3552 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3540 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3556 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3544 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3548 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3552 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3560 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3564 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3568 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2544 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3568 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3572 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3572 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3576 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3580 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3584 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3576 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3580 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3584 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2560 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3584 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3588 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3588 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3592 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3596 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3600 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3592 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3596 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3600 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2576 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3600 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3604 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3604 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3608 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3612 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3616 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3608 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3612 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3616 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2592 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3616 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3620 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3620 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3624 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3628 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3632 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3624 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3628 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3632 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2608 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3632 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3636 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3636 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3640 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3644 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3648 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3640 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3644 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3648 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2624 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3648 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3652 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3652 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3656 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3660 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3664 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3656 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3660 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3664 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2640 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3664 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3668 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3668 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3672 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3676 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3680 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3672 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3676 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3680 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2656 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3680 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3684 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3684 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3688 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3692 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3696 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3688 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3692 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3696 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2672 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3696 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3700 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3700 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3704 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3708 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3712 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3704 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3708 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3712 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2688 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3712 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3716 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3716 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3720 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3724 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3728 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3720 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3724 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3728 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2704 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3728 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3732 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3732 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3736 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3740 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3744 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3736 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3740 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3744 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2720 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3744 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3748 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3748 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3752 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3756 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3760 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3752 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3756 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3760 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2736 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3760 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3764 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3764 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3768 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3772 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3776 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3768 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3772 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3776 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2752 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3776 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3780 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3780 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3784 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3788 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3792 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3784 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3788 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3792 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2768 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3792 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3796 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3796 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3800 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3804 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3808 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3800 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3804 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3808 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2784 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3808 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3812 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3812 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3816 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3820 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3824 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3816 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3820 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3824 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2800 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3824 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3828 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3828 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3832 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3836 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3840 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3832 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3836 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3840 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2816 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3840 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3844 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3844 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3848 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3852 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3856 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3848 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3852 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3856 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2832 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3856 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3860 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3860 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3864 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3868 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3872 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3864 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3868 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3872 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2848 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3872 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3876 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3876 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3880 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3884 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3888 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3880 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3884 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3888 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2864 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3888 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3892 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3892 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3896 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3900 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3904 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3896 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3900 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3904 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2880 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3904 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3908 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3908 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3912 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3916 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3920 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3912 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3916 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3920 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2896 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3920 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3924 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3924 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3928 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3932 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3936 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3928 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3932 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3936 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2912 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3936 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3940 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3940 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3944 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3948 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3952 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3944 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3948 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3952 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2928 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3952 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3956 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3956 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3960 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3964 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3968 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3960 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3964 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3968 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2944 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3968 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3972 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3972 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3976 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3980 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3984 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3976 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3980 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:3984 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2960 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:3984 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3988 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:3988 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3992 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3996 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4000 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:3992 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:3996 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:4000 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2976 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:4000 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4004 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:4004 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4008 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4012 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4016 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:4008 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:4012 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:4016 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2992 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:4016 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4020 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:4020 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4024 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4028 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4032 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:4024 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:4028 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:4032 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3008 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:4032 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4036 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:4036 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4040 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4044 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4048 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:4040 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:4044 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:4048 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3024 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:4048 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4052 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:4052 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4056 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4060 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4064 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:4056 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:4060 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:4064 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3040 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:4064 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4068 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], 0 offset:4068 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4072 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4076 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4080 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], 0 offset:4072 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], 0 offset:4076 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], 0 offset:4080 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3056 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:4080 +; GFX6-NEXT: s_mov_b64 s[4:5], 0x80 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3072 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:3968 ; GFX6-NEXT: s_mov_b32 s2, 0x40100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3088 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:3984 ; GFX6-NEXT: s_mov_b32 s2, 0x40500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3104 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4000 ; GFX6-NEXT: s_mov_b32 s2, 0x40900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3120 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4016 ; GFX6-NEXT: s_mov_b32 s2, 0x40d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3136 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4032 ; GFX6-NEXT: s_mov_b32 s2, 0x41100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3152 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4048 ; GFX6-NEXT: s_mov_b32 s2, 0x41500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3168 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4064 ; GFX6-NEXT: s_mov_b32 s2, 0x41900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3184 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4080 ; GFX6-NEXT: s_mov_b32 s2, 0x41d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3200 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:3968 ; GFX6-NEXT: s_mov_b32 s2, 0x42100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3216 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:3984 ; GFX6-NEXT: s_mov_b32 s2, 0x42500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3232 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4000 ; GFX6-NEXT: s_mov_b32 s2, 0x42900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3248 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4016 ; GFX6-NEXT: s_mov_b32 s2, 0x42d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3264 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4032 ; GFX6-NEXT: s_mov_b32 s2, 0x43100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3280 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4048 ; GFX6-NEXT: s_mov_b32 s2, 0x43500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3296 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4064 ; GFX6-NEXT: s_mov_b32 s2, 0x43900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3312 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4080 ; GFX6-NEXT: s_mov_b32 s2, 0x43d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3328 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:3968 ; GFX6-NEXT: s_mov_b32 s2, 0x44100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3344 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:3984 ; GFX6-NEXT: s_mov_b32 s2, 0x44500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3360 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4000 ; GFX6-NEXT: s_mov_b32 s2, 0x44900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3376 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4016 ; GFX6-NEXT: s_mov_b32 s2, 0x44d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3392 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4032 ; GFX6-NEXT: s_mov_b32 s2, 0x45100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3408 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4048 ; GFX6-NEXT: s_mov_b32 s2, 0x45500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3424 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4064 ; GFX6-NEXT: s_mov_b32 s2, 0x45900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3440 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4080 ; GFX6-NEXT: s_mov_b32 s2, 0x45d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3456 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:3968 ; GFX6-NEXT: s_mov_b32 s2, 0x46100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3472 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:3984 ; GFX6-NEXT: s_mov_b32 s2, 0x46500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3488 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4000 ; GFX6-NEXT: s_mov_b32 s2, 0x46900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3504 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4016 ; GFX6-NEXT: s_mov_b32 s2, 0x46d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3520 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4032 ; GFX6-NEXT: s_mov_b32 s2, 0x47100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3536 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4048 ; GFX6-NEXT: s_mov_b32 s2, 0x47500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3552 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4064 ; GFX6-NEXT: s_mov_b32 s2, 0x47900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3568 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4080 ; GFX6-NEXT: s_mov_b32 s2, 0x47d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3584 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:3968 ; GFX6-NEXT: s_mov_b32 s2, 0x48100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3600 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:3984 ; GFX6-NEXT: s_mov_b32 s2, 0x48500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3616 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4000 ; GFX6-NEXT: s_mov_b32 s2, 0x48900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3632 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4016 ; GFX6-NEXT: s_mov_b32 s2, 0x48d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3648 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4032 ; GFX6-NEXT: s_mov_b32 s2, 0x49100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3664 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4048 ; GFX6-NEXT: s_mov_b32 s2, 0x49500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3680 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4064 ; GFX6-NEXT: s_mov_b32 s2, 0x49900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3696 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4080 ; GFX6-NEXT: s_mov_b32 s2, 0x49d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3712 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:3968 ; GFX6-NEXT: s_mov_b32 s2, 0x4a100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3728 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:3984 ; GFX6-NEXT: s_mov_b32 s2, 0x4a500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3744 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4000 ; GFX6-NEXT: s_mov_b32 s2, 0x4a900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3760 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4016 ; GFX6-NEXT: s_mov_b32 s2, 0x4ad00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3776 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4032 ; GFX6-NEXT: s_mov_b32 s2, 0x4b100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3792 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4048 ; GFX6-NEXT: s_mov_b32 s2, 0x4b500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3808 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4064 ; GFX6-NEXT: s_mov_b32 s2, 0x4b900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3824 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4080 ; GFX6-NEXT: s_mov_b32 s2, 0x4bd00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3840 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:3968 ; GFX6-NEXT: s_mov_b32 s2, 0x4c100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3856 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:3984 ; GFX6-NEXT: s_mov_b32 s2, 0x4c500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3872 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4000 ; GFX6-NEXT: s_mov_b32 s2, 0x4c900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3888 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4016 ; GFX6-NEXT: s_mov_b32 s2, 0x4cd00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3904 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4032 ; GFX6-NEXT: s_mov_b32 s2, 0x4d100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3920 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4048 ; GFX6-NEXT: s_mov_b32 s2, 0x4d500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3936 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4064 ; GFX6-NEXT: s_mov_b32 s2, 0x4d900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3952 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4080 ; GFX6-NEXT: s_mov_b32 s2, 0x4dd00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:3968 ; GFX6-NEXT: s_mov_b32 s2, 0x4e100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:3984 ; GFX6-NEXT: s_mov_b32 s2, 0x4e500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4000 ; GFX6-NEXT: s_mov_b32 s2, 0x4e900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4016 ; GFX6-NEXT: s_mov_b32 s2, 0x4ed00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4032 ; GFX6-NEXT: s_mov_b32 s2, 0x4f100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4048 ; GFX6-NEXT: s_mov_b32 s2, 0x4f500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4064 ; GFX6-NEXT: s_mov_b32 s2, 0x4f900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4080 ; GFX6-NEXT: ;;#ASMSTART ; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: v_add_i32_e32 v7, vcc, s0, v5 +; GFX6-NEXT: v_mov_b32_e32 v4, s1 +; GFX6-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc ; GFX6-NEXT: ;;#ASMSTART ; GFX6-NEXT: ;;#ASMEND ; GFX6-NEXT: ;;#ASMSTART @@ -2690,2304 +2684,2303 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX6-NEXT: ;;#ASMEND ; GFX6-NEXT: ;;#ASMSTART ; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3556 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3560 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3564 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3568 ; 4-byte Folded Reload -; GFX6-NEXT: v_mov_b32_e32 v4, s1 -; GFX6-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc -; GFX6-NEXT: s_mov_b64 s[2:3], s[18:19] +; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7] +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4f500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4064 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3524 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3528 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3532 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3536 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4f100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4048 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3492 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3496 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3500 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3504 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4ed00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4032 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3460 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3464 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3468 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3472 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4e900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4016 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3428 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3432 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3436 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3440 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4e500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:4000 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3396 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3400 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3404 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3408 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4e100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:3984 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3364 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3368 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3372 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3376 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4dd00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:3968 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3332 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3336 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3340 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3344 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4d900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4080 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3268 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3272 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3276 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3280 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4d500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4064 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3236 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3240 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3244 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3248 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4d100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4048 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3204 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3208 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3212 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3216 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4cd00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4032 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3172 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3176 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3180 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3184 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4c900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4016 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3140 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3144 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3148 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3152 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4c500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:4000 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3108 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3112 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3116 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3120 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s33, 0x4c100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:3984 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3076 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3080 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3084 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3088 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s33 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[28:31], 0 addr64 offset:3968 +; GFX6-NEXT: s_mov_b32 s28, 0x4bd00 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3044 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3048 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3052 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3056 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s28, 0x4b900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4080 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2964 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2968 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2972 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2976 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s28, 0x4b500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4064 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2932 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2936 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2940 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2944 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s28, 0x4b100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4048 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2900 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2904 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2908 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2912 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s28, 0x4ad00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4032 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2868 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2872 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2876 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2880 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s28, 0x4a900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4016 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2836 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2840 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2844 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2848 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s28, 0x4a500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:4000 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2804 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2808 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2812 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2816 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s28, 0x4a100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:3984 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2772 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2776 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2780 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2784 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s28 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s28 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s28 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s28 offset:12 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[24:27], 0 addr64 offset:3968 +; GFX6-NEXT: s_mov_b32 s24, 0x49d00 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2740 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2744 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2748 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2752 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s24, 0x49900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4080 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2676 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2680 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2684 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2688 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s24, 0x49500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4064 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2644 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2648 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2652 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2656 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s24, 0x49100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4048 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2612 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2616 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2620 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2624 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s24, 0x48d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4032 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2580 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2584 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2588 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2592 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s24, 0x48900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4016 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2548 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2552 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2556 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2560 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s24, 0x48500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:4000 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2516 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2520 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2524 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2528 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s24, 0x48100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:3984 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2484 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2488 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2492 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2496 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s24 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[20:23], 0 addr64 offset:3968 +; GFX6-NEXT: s_mov_b32 s20, 0x47d00 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2452 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2456 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2460 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2464 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s20, 0x47900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4080 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2372 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2376 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2380 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2384 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s20, 0x47500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4064 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2340 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2344 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2348 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2352 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s20, 0x47100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4048 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2308 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2312 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2316 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2320 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s20, 0x46d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4032 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2276 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2280 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2284 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2288 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s20, 0x46900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4016 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2244 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2248 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2252 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2256 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s20, 0x46500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:4000 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2212 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2216 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2220 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2224 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s20, 0x46100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:3984 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2180 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2184 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2188 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2192 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s20 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s20 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s20 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s20 offset:12 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[16:19], 0 addr64 offset:3968 +; GFX6-NEXT: s_mov_b32 s16, 0x45d00 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2148 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2152 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2156 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2160 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s16, 0x45900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4080 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2084 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2088 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2092 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2096 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s16, 0x45500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4064 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2052 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2056 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2060 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2064 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s16, 0x45100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4048 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2020 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2024 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2028 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2032 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s16, 0x44d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4032 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1988 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1992 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1996 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2000 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s16, 0x44900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4016 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1956 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1960 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1964 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1968 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s16, 0x44500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:4000 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1924 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1928 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1932 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1936 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s16, 0x44100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:3984 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1892 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1896 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1900 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1904 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s16 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s16 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s16 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s16 offset:12 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[12:15], 0 addr64 offset:3968 +; GFX6-NEXT: s_mov_b32 s12, 0x43d00 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1860 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1864 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1868 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1872 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s12, 0x43900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4080 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1780 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1784 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1788 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1792 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s12, 0x43500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4064 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1748 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1752 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1756 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1760 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s12, 0x43100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4048 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1716 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1720 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1724 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1728 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s12, 0x42d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4032 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1684 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1688 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1692 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1696 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s12, 0x42900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4016 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1652 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1656 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1660 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1664 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s12, 0x42500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:4000 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1620 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1624 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1628 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1632 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s12, 0x42100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:3984 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1588 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1592 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1596 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1600 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s12 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[8:11], 0 addr64 offset:3968 +; GFX6-NEXT: s_mov_b32 s8, 0x41d00 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1556 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1560 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1564 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1568 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s8, 0x41900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4080 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1492 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1496 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1500 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1504 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s8, 0x41500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4080 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4064 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1460 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1464 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1468 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1472 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s8, 0x41100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4064 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4048 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1428 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1432 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1436 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1440 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s8, 0x40d00 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4048 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4032 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1396 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1400 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1404 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1408 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s8, 0x40900 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4032 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4016 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1364 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1368 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1372 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1376 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s8, 0x40500 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4016 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:4000 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1332 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1336 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1340 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1344 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s8, 0x40100 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4000 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:3984 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1300 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1304 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1308 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1312 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:3984 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:3968 +; GFX6-NEXT: s_mov_b32 s4, 0x3fd00 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1268 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1272 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1276 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1280 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:3968 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4080 -; GFX6-NEXT: s_mov_b32 s4, 0x4f900 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4f500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:4068 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:4072 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:4076 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:4080 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4064 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4f100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:4052 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:4056 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:4060 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:4064 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4048 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4ed00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:4036 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:4040 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:4044 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:4048 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4032 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4e900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:4020 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:4024 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:4028 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:4032 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4016 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4e500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:4004 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:4008 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:4012 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:4016 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4000 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4e100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3988 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3992 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3996 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:4000 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3984 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4dd00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3972 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3976 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3980 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3984 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3968 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4d900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3956 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3960 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3964 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3968 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3952 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4d500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3940 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3944 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3948 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3952 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3936 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4d100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3924 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3928 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3932 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3936 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3920 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4cd00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3908 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3912 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3916 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3920 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3904 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4c900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3892 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3896 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3900 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3904 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3888 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4c500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3876 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3880 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3884 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3888 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3872 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4c100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3860 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3864 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3868 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3872 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3856 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4bd00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3844 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3848 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3852 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3856 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3840 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4b900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3828 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3832 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3836 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3840 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3824 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4b500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3812 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3816 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3820 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3824 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3808 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4b100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3796 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3800 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3804 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3808 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3792 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4ad00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3780 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3784 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3788 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3792 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3776 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4a900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3764 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3768 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3772 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3776 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3760 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4a500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3748 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3752 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3756 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3760 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3744 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4a100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3732 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3736 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3740 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3744 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3728 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x49d00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3716 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3720 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3724 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3728 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3712 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x49900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3700 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3704 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3708 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3712 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3696 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x49500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3684 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3688 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3692 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3696 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3680 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x49100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3668 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3672 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3676 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3680 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3664 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x48d00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3652 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3656 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3660 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3664 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3648 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x48900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3636 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3640 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3644 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3648 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3632 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x48500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3620 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3624 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3628 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3632 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3616 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x48100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3604 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3608 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3612 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3616 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3600 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x47d00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3588 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3592 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3596 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3600 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3584 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x47900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3572 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3576 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3580 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3584 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3568 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x47500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3556 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3560 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3564 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3568 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3552 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x47100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3540 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3544 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3548 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3552 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3536 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x46d00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3524 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3528 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3532 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3536 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3520 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x46900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3508 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3512 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3516 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3520 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3504 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x46500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3492 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3496 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3500 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3504 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3488 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x46100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3476 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3480 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3484 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3488 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3472 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x45d00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3460 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3464 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3468 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3472 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3456 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x45900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3444 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3448 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3452 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3456 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3440 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x45500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3428 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3432 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3436 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3440 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3424 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x45100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3412 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3416 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3420 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3424 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3408 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x44d00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3396 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3400 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3404 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3408 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3392 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x44900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3380 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3384 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3388 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3392 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3376 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x44500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3364 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3368 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3372 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3376 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3360 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x44100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3348 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3352 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3356 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3360 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3344 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x43d00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3332 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3336 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3340 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3344 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3328 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x43900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3316 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3320 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3324 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3328 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3312 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x43500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3300 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3304 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3308 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3312 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3296 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x43100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3284 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3288 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3292 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3296 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3280 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x42d00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3268 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3272 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3276 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3280 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3264 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x42900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3252 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3256 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3260 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3264 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3248 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x42500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3236 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3240 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3244 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3248 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3232 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x42100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3220 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3224 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3228 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3232 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3216 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x41d00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3204 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3208 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3212 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3216 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3200 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x41900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3188 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3192 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3196 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3200 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3184 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x41500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3172 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3176 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3180 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3184 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3168 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x41100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3156 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3160 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3164 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3168 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3152 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x40d00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3140 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3144 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3148 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3152 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3136 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x40900 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3124 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3128 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3132 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3136 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3120 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x40500 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3108 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3112 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3116 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3120 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3104 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x40100 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3092 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3096 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3100 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3104 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3088 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x3fd00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3076 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3080 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3084 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3088 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3072 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3060 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3064 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3068 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3072 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3056 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4068 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4072 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4076 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4080 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3044 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3048 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3052 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3056 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3040 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4052 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4056 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4060 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4064 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3028 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3032 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3036 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3040 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3024 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4036 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4040 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4044 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4048 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:3012 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3016 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3020 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3024 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3008 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4020 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4024 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4028 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4032 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2996 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:3000 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:3004 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:3008 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2992 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4004 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4008 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4012 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4016 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2980 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2984 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2988 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2992 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2976 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3988 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3992 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3996 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4000 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2964 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2968 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2972 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2976 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2960 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3972 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3976 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3980 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3984 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2948 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2952 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2956 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2960 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2944 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3956 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3960 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3964 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3968 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2932 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2936 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2940 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2944 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2928 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3940 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3944 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3948 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3952 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2916 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2920 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2924 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2928 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2912 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3924 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3928 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3932 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3936 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2900 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2904 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2908 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2912 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2896 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3908 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3912 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3916 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3920 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2884 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2888 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2892 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2896 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2880 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3892 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3896 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3900 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3904 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2868 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2872 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2876 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2880 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2864 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3876 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3880 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3884 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3888 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2852 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2856 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2860 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2864 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2848 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3860 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3864 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3868 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3872 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2836 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2840 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2844 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2848 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2832 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3844 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3848 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3852 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3856 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2820 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2824 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2828 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2832 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2816 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3828 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3832 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3836 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3840 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2804 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2808 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2812 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2816 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2800 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3812 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3816 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3820 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3824 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2788 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2792 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2796 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2800 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2784 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3796 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3800 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3804 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3808 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2772 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2776 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2780 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2784 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2768 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3780 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3784 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3788 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3792 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2756 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2760 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2764 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2768 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2752 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3764 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3768 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3772 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3776 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2740 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2744 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2748 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2752 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2736 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3748 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3752 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3756 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3760 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2724 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2728 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2732 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2736 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2720 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3732 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3736 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3740 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3744 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2708 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2712 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2716 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2720 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2704 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3716 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3720 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3724 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3728 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2692 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2696 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2700 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2704 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2688 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3700 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3704 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3708 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3712 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2676 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2680 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2684 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2688 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2672 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3684 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3688 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3692 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3696 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2660 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2664 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2668 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2672 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2656 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3668 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3672 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3676 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3680 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2644 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2648 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2652 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2656 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2640 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3652 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3656 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3660 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3664 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2628 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2632 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2636 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2640 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2624 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3636 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3640 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3644 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3648 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2612 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2616 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2620 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2624 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2608 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3620 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3624 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3628 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3632 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2596 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2600 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2604 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2608 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2592 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3604 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3608 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3612 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3616 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2580 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2584 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2588 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2592 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2576 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3588 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3592 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3596 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3600 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2564 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2568 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2572 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2576 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2560 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3572 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3576 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3580 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3584 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2548 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2552 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2556 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2560 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2544 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3540 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3544 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3548 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3552 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2532 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2536 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2540 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2544 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2528 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3508 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3512 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3516 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3520 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2516 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2520 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2524 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2528 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2512 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3476 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3480 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3484 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3488 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2500 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2504 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2508 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2512 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2496 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3444 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3448 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3452 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3456 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2484 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2488 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2492 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2496 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2480 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3412 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3416 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3420 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3424 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2468 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2472 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2476 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2480 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2464 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3380 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3384 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3388 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3392 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2452 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2456 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2460 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2464 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2448 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3348 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3352 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3356 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3360 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2436 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2440 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2444 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2448 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2432 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3316 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3320 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3324 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3328 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2420 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2424 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2428 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2432 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2416 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3300 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3304 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3308 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3312 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2404 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2408 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2412 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2416 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2400 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3284 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3288 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3292 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3296 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2388 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2392 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2396 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2400 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2384 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3252 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3256 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3260 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3264 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2372 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2376 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2380 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2384 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2368 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3220 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3224 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3228 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3232 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2356 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2360 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2364 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2368 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2352 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3188 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3192 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3196 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3200 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2340 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2344 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2348 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2352 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2336 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3156 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3160 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3164 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3168 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2324 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2328 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2332 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2336 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2320 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3124 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3128 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3132 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3136 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2308 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2312 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2316 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2320 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2304 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3092 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3096 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3100 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3104 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2292 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2296 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2300 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2304 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2288 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3060 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3064 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3068 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3072 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2276 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2280 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2284 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2288 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2272 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3028 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3032 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3036 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3040 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2260 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2264 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2268 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2272 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2256 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3012 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3016 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3020 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3024 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2244 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2248 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2252 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2256 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2240 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2996 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3000 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3004 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3008 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2228 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2232 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2236 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2240 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2224 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2980 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2984 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2988 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2992 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2212 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2216 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2220 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2224 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2208 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2948 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2952 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2956 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2960 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2196 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2200 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2204 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2208 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2192 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2916 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2920 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2924 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2928 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2180 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2184 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2188 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2192 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2176 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2884 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2888 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2892 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2896 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2164 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2168 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2172 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2176 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2160 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2852 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2856 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2860 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2864 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2148 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2152 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2156 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2160 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2144 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2820 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2824 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2828 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2832 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2132 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2136 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2140 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2144 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2128 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2788 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2792 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2796 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2800 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2116 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2120 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2124 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2128 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2112 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2756 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2760 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2764 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2768 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2100 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2104 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2108 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2112 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2096 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2724 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2728 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2732 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2736 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2084 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2088 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2092 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2096 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2080 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2708 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2712 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2716 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2720 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2068 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2072 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2076 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2080 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2064 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2692 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2696 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2700 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2704 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2052 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2056 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2060 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2064 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2048 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2660 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2664 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2668 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2672 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2036 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2040 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2044 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2048 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2032 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2628 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2632 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2636 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2640 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2020 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2024 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2028 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2032 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2016 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2596 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2600 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2604 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2608 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:2004 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:2008 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:2012 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2016 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2000 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2564 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2568 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2572 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2576 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1988 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1992 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1996 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:2000 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1984 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2532 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2536 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2540 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2544 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1972 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1976 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1980 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1984 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1968 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2500 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2504 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2508 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2512 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1956 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1960 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1964 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1968 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1952 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2468 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2472 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2476 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2480 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1940 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1944 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1948 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1952 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1936 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2436 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2440 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2444 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2448 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1924 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1928 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1932 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1936 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1920 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2420 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2424 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2428 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2432 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1908 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1912 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1916 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1920 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1904 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2404 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2408 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2412 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2416 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1892 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1896 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1900 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1904 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1888 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2388 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2392 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2396 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2400 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1876 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1880 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1884 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1888 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1872 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2356 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2360 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2364 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2368 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1860 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1864 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1868 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1872 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1856 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2324 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2328 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2332 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2336 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1844 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1848 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1852 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1856 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1840 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2292 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2296 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2300 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2304 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1828 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1832 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1836 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1840 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1824 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2260 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2264 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2268 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2272 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1812 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1816 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1820 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1824 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1808 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2228 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2232 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2236 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2240 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1796 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1800 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1804 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1808 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1792 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2196 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2200 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2204 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2208 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1780 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1784 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1788 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1792 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1776 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2164 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2168 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2172 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2176 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1764 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1768 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1772 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1776 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1760 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2132 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2136 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2140 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2144 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1748 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1752 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1756 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1760 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1744 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2116 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2120 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2124 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2128 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1732 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1736 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1740 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1744 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1728 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2100 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2104 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2108 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2112 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1716 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1720 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1724 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1728 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1712 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2068 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2072 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2076 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2080 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1700 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1704 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1708 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1712 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1696 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2036 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2040 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2044 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2048 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1684 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1688 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1692 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1696 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1680 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2004 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2008 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2012 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2016 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1668 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1672 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1676 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1680 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1664 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1972 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1976 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1980 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1984 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1652 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1656 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1660 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1664 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1648 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1940 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1944 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1948 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1952 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1636 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1640 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1644 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1648 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1632 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1908 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1912 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1916 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1920 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1620 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1624 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1628 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1632 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1616 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1876 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1880 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1884 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1888 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1604 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1608 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1612 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1616 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1600 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1844 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1848 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1852 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1856 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1588 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1592 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1596 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1600 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1584 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1828 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1832 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1836 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1840 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1572 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1576 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1580 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1584 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1568 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1812 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1816 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1820 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1824 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1556 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1560 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1564 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1568 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1552 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1796 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1800 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1804 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1808 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1540 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1544 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1548 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1552 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1536 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1764 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1768 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1772 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1776 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1524 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1528 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1532 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1536 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1520 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1732 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1736 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1740 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1744 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1508 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1512 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1516 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1520 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1504 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1700 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1704 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1708 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1712 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1492 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1496 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1500 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1504 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1488 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1668 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1672 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1676 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1680 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1476 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1480 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1484 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1488 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1472 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1636 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1640 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1644 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1648 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1460 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1464 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1468 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1472 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1456 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1604 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1608 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1612 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1616 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1444 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1448 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1452 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1456 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1440 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1572 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1576 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1580 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1584 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1428 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1432 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1436 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1440 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1424 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1540 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1544 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1548 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1552 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1412 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1416 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1420 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1424 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1408 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1524 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1528 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1532 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1536 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1396 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1400 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1404 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1408 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1392 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1508 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1512 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1516 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1520 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1380 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1384 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1388 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1392 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1376 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1476 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1480 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1484 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1488 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1364 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1368 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1372 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1376 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1360 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1444 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1448 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1452 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1456 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1348 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1352 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1356 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1360 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1344 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1412 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1416 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1420 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1424 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1332 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1336 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1340 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1344 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1328 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1380 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1384 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1388 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1392 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1316 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1320 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1324 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1328 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1312 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1348 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1352 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1356 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1360 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1300 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1304 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1308 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1312 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1296 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1316 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1320 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1324 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1328 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1284 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1288 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1292 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1296 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1280 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1284 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1288 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1292 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1296 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1268 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1272 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1276 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1280 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1264 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1252 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1256 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1260 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1264 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1252 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1256 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1260 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1264 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1248 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1236 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1240 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1244 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1248 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1236 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1240 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1244 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1248 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1232 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1220 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1224 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1228 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1232 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1220 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1224 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1228 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1232 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1216 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1204 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1208 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1212 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1216 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1204 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1208 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1212 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1216 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1200 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1188 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1192 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1196 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1200 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1188 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1192 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1196 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1200 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1184 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1172 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1176 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1180 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1184 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1172 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1176 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1180 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1184 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1168 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1156 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1160 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1164 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1168 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1156 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1160 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1164 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1168 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1152 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1140 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1144 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1148 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1152 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1140 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1144 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1148 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1152 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1136 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1124 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1128 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1132 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1136 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1124 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1128 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1132 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1136 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1120 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1108 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1112 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1116 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1120 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1108 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1112 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1116 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1120 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1104 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1092 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1096 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1100 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1104 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1092 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1096 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1100 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1104 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1088 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1076 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1080 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1084 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1088 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1076 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1080 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1084 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1088 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1072 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1060 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1064 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1068 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1072 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1060 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1064 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1068 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1072 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1056 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1044 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1048 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1052 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1056 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1044 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1048 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1052 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1056 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1040 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1028 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1032 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1036 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1040 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1028 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1032 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1036 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1040 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1024 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1012 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1016 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1020 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1024 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:1012 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1016 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1020 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1024 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1008 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:996 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1000 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1004 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1008 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:996 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:1000 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:1004 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:1008 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:992 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:980 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:984 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:988 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:992 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:980 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:984 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:988 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:992 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:976 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:964 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:968 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:972 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:976 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:964 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:968 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:972 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:976 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:960 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:948 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:952 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:956 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:960 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:948 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:952 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:956 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:960 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:944 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:932 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:936 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:940 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:944 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:932 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:936 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:940 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:944 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:928 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:916 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:920 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:924 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:928 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:916 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:920 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:924 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:928 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:912 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:900 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:904 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:908 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:912 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:900 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:904 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:908 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:912 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:896 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:884 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:888 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:892 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:896 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:884 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:888 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:892 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:896 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:880 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:868 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:872 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:876 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:880 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:868 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:872 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:876 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:880 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:864 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:852 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:856 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:860 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:864 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:852 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:856 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:860 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:864 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:848 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:836 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:840 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:844 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:848 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:836 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:840 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:844 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:848 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:832 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:820 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:824 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:828 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:832 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:820 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:824 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:828 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:832 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:816 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:804 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:808 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:812 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:816 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:804 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:808 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:812 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:816 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:800 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:788 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:792 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:796 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:800 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:788 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:792 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:796 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:800 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:784 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:772 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:776 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:780 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:784 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:772 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:776 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:780 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:784 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:768 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:756 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:760 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:764 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:768 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:756 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:760 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:764 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:768 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:752 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:740 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:744 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:748 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:752 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:740 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:744 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:748 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:752 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:736 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:724 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:728 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:732 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:736 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:724 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:728 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:732 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:736 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:720 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:708 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:712 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:716 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:720 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:708 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:712 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:716 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:720 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:704 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:692 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:696 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:700 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:704 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:692 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:696 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:700 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:704 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:688 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:676 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:680 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:684 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:688 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:676 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:680 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:684 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:688 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:672 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:660 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:664 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:668 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:672 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:660 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:664 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:668 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:672 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:656 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:644 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:648 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:652 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:656 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:644 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:648 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:652 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:656 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:640 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:628 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:632 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:636 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:640 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:628 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:632 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:636 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:640 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:624 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:612 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:616 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:620 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:624 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:612 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:616 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:620 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:624 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:608 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:596 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:600 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:604 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:608 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:596 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:600 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:604 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:608 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:592 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:580 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:584 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:588 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:592 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:580 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:584 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:588 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:592 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:576 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:564 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:568 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:572 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:576 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:564 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:568 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:572 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:576 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:560 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:548 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:552 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:556 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:560 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:548 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:552 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:556 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:560 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:544 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:532 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:536 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:540 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:544 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:532 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:536 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:540 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:544 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:528 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:516 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:520 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:524 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:528 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:516 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:520 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:524 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:528 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:512 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:500 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:504 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:508 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:512 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:500 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:504 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:508 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:512 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:496 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:484 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:488 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:492 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:496 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:484 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:488 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:492 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:496 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:480 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:468 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:472 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:476 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:480 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:468 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:472 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:476 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:480 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:464 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:452 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:456 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:460 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:464 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:452 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:456 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:460 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:464 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:448 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:436 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:440 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:444 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:448 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:436 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:440 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:444 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:448 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:432 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:420 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:424 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:428 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:432 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:420 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:424 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:428 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:432 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:416 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:404 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:408 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:412 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:416 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:404 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:408 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:412 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:416 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:400 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:388 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:392 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:396 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:400 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:388 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:392 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:396 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:400 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:384 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:372 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:376 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:380 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:384 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:372 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:376 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:380 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:384 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:368 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:356 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:360 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:364 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:368 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:356 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:360 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:364 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:368 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:352 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:340 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:344 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:348 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:352 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:340 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:344 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:348 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:352 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:336 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:324 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:328 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:332 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:336 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:324 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:328 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:332 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:336 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:320 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:308 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:312 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:316 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:320 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:308 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:312 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:316 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:320 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:304 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:292 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:296 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:300 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:304 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:292 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:296 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:300 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:304 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:288 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:276 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:280 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:284 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:288 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:276 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:280 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:284 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:288 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:272 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:260 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:264 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:268 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:272 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:260 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:264 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:268 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:272 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:256 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:244 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:248 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:252 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:256 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:244 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:248 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:252 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:256 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:240 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:228 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:232 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:236 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:240 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:228 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:232 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:236 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:240 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:224 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:212 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:216 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:220 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:224 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:212 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:216 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:220 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:224 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:208 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:196 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:200 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:204 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:208 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:196 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:200 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:204 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:208 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:192 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:180 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:184 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:188 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:192 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:180 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:184 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:188 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:192 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:176 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:164 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:168 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:172 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:176 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:164 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:168 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:172 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:176 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:160 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:148 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:152 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:156 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:160 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:148 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:152 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:156 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:160 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:144 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:132 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:136 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:140 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:144 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:132 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:136 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:140 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:144 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:128 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:116 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:120 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:124 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:128 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:116 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:120 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:124 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:128 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:112 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:100 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:104 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:108 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:112 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:100 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:104 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:108 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:112 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:96 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:84 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:88 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:92 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:96 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:84 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:88 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:92 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:96 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:80 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:68 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:72 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:76 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:80 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:68 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:72 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:76 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:80 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:64 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:52 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:56 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:60 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:64 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:52 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:56 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:60 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:64 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:48 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:36 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:40 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:44 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:48 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:36 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:40 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:44 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:48 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:32 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:20 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:24 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:28 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:32 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:20 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:24 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:28 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:32 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:16 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:16 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], 0 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], 0 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], 0 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], 0 offset:16 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 ; GFX6-NEXT: s_endpgm @@ -5009,13 +5002,6 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 ; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 4 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x84 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x104 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x184 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x204 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x284 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x304 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x384 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 @@ -5050,1261 +5036,1268 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xa4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xe4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x180 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s5, v2 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x180 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x104 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x114 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x114 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x124 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x124 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x134 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x134 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x144 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x144 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x154 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x154 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x164 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x164 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x174 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x174 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x200 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s6, v2 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x200 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x184 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x194 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x194 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x280 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s7, v2 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x280 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x204 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x214 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x214 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x224 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x224 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x234 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x234 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x244 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x244 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x254 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x254 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x264 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x264 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x274 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x274 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x300 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s8, v2 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x300 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x284 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x294 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x294 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x2f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x380 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s9, v2 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x380 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x304 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x314 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x314 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x324 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x324 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x334 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x334 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x344 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x344 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x354 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x354 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x364 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x364 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x374 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x374 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x400 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s10, v2 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x400 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x384 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x394 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x394 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3f4 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x3f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x404 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x404 ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, s1 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:16 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x414 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x414 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:32 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x424 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x424 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:48 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x434 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x434 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:64 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x444 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x444 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:80 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x454 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x454 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:96 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x464 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x464 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:112 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x474 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x474 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:128 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x484 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x484 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:144 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x494 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x494 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:160 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:176 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:192 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:208 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:224 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:240 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x4f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:256 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x504 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x504 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:272 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x514 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x514 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:288 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x524 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x524 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:304 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x534 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x534 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:320 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x544 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x544 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:336 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x554 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x554 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:352 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x564 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x564 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:368 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x574 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x574 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:384 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x584 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x584 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:400 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x594 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x594 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:416 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:432 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:448 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:464 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:480 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:496 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x5f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:512 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x604 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x604 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:528 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x614 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x614 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:544 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x624 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x624 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:560 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x634 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x634 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:576 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x644 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x644 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:592 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x654 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x654 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:608 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x664 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x664 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:624 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x674 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x674 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:640 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x684 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x684 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:656 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x694 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x694 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:672 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:688 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:704 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:720 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:736 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:752 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x6f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:768 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x704 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x704 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:784 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x714 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x714 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:800 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x724 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x724 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:816 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x734 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x734 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:832 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x744 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x744 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:848 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x754 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x754 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:864 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x764 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x764 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:880 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x774 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x774 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:896 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x784 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x784 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:912 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x794 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x794 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:928 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:944 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:960 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:976 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:992 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x7f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x804 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x804 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x814 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x814 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x824 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x824 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x834 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x834 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x844 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x844 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x854 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x854 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x864 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x864 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x874 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x874 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x884 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x884 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x894 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x894 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x8f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x904 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x904 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x914 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x914 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x924 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x924 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x934 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x934 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x944 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x944 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x954 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x954 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x964 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x964 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x974 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x974 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x984 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x984 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x994 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x994 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x9f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xa94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xaa4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xaa4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xab4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xab4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xac4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xac4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xad4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xad4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xae4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xae4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xaf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xaf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xb94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xba4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xba4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbe4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbe4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xbf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2096 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2112 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2128 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2144 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2160 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2176 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2192 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xc94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2208 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xca4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xca4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2224 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2240 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2256 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2272 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xce4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xce4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2288 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xcf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2304 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2320 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2336 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2352 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2368 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2384 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2400 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2416 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2432 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2448 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xd94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2464 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xda4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xda4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2480 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2496 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2512 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2528 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xde4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xde4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2544 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdf4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xdf4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2560 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2576 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2592 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2608 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2624 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2640 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2656 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2672 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2688 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2704 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xe94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2720 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xea4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xea4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2736 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xeb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xeb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2752 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xec4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xec4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2768 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xed4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xed4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2784 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xee4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xee4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2800 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xef4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xef4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2816 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf04 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf04 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2832 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf14 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf14 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2848 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf24 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf24 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2864 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf34 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf34 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2880 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf44 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf44 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2896 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf54 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf54 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2912 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf64 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2928 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf74 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf74 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2944 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf84 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf84 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2960 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf94 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xf94 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2976 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfa4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfa4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2992 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfb4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfb4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3008 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfc4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfc4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3024 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfd4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3040 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfe4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xfe4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3056 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xff4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0xff4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3072 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1004 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1004 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3088 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1014 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1014 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3104 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1024 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1024 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3120 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1034 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1034 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3136 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1044 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1044 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3152 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1054 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1054 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3168 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1064 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1064 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3184 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1074 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1074 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3200 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1084 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1084 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3216 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1094 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1094 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3232 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3248 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3264 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3280 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3296 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3312 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x10f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3328 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1104 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1104 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3344 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1114 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1114 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3360 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1124 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1124 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3376 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1134 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1134 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3392 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1144 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1144 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3408 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1154 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1154 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3424 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1164 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1164 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3440 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1174 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1174 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3456 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1184 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1184 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3472 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1194 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1194 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3488 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3504 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3520 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3536 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3552 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3568 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x11f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3584 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1204 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1204 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3600 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1214 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1214 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3616 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1224 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1224 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3632 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1234 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1234 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3648 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1244 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1244 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3664 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1254 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1254 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3680 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1264 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1264 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3696 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1274 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1274 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3712 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1284 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1284 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3728 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1294 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1294 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3744 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3760 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3776 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3792 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3808 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3824 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12f4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x12f4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3840 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1304 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1304 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3856 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1314 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1314 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3872 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1324 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1324 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3888 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1334 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1334 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3904 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1344 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1344 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3920 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1354 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1354 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3936 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1364 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1364 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3952 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1374 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1374 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3968 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1384 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1384 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1394 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x1394 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13a4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13a4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13b4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13b4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13c4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13c4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13d4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13e4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x13e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4080 ; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13e4 ; GFX9-FLATSCR-NEXT: ;;#ASMSTART @@ -7346,7 +7339,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3f4 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s10, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x400, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7380,7 +7373,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s9, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x380, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x364 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7414,7 +7407,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s8, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x300, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7448,7 +7441,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s7, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x280, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x264 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7482,7 +7475,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s6, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x200, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1e4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7516,7 +7509,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s5, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x180, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x164 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -7550,7 +7543,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v4 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v4 ; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xe4 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll index 2f43cc022afd33..fbe0b156cd9baa 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll +++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll @@ -15,126 +15,126 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: bb.0..expVert: ; CHECK-NEXT: liveins: $sgpr3, $sgpr4, $sgpr5, $sgpr8, $sgpr9, $sgpr10, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr25, $sgpr27, $sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef %56.sub0:sgpr_64 = COPY $sgpr31 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr27 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr25 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr18 - ; CHECK-NEXT: undef %50.sub0:sgpr_64 = COPY $sgpr19 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr20 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr21 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr22 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr23 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr9 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr10 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr8 - ; CHECK-NEXT: undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4) - ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 4, implicit-def dead $scc + ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:sgpr_64 = COPY $sgpr31 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr27 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr25 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr5 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr18 + ; CHECK-NEXT: undef [[COPY7:%[0-9]+]].sub0:sgpr_64 = COPY $sgpr19 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr20 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr21 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr22 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr23 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr9 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr10 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr8 + ; CHECK-NEXT: undef [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4) + ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc - ; CHECK-NEXT: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, 65535, implicit-def dead $scc - ; CHECK-NEXT: undef %130.sub0:sreg_64 = S_ADD_U32 [[COPY5]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: %130.sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub1:sgpr_128 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: KILL undef %74:sreg_64 ; CHECK-NEXT: KILL undef %132:sgpr_128 - ; CHECK-NEXT: KILL %130.sub0, %130.sub1 + ; CHECK-NEXT: KILL [[S_ADD_U32_]].sub0, [[S_ADD_U32_]].sub1 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: undef %302.sub1:sgpr_128 = S_MOV_B32 0 + ; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0 ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: KILL undef %89:sgpr_128 ; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc ; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc ; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY5]], 64, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY6]], 64, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %54:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %149.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: %149.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %156.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %149, 0, 0 :: (invariant load (s128) from %ir.87, addrspace 4) - ; CHECK-NEXT: %156.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %163.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: %163.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 0, 0 :: (invariant load (s128) from %ir.87, addrspace 4) + ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %171:sreg_32, 31, implicit-def dead $scc - ; CHECK-NEXT: undef %176.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], undef %171:sreg_32, implicit-def $scc - ; CHECK-NEXT: %176.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %183.sub0:sreg_64 = S_ADD_U32 %50.sub0, [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: %183.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %190.sub0:sreg_64 = S_ADD_U32 %50.sub0, [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: %190.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %200.sub0:sreg_64 = S_ADD_U32 %50.sub0, undef %171:sreg_32, implicit-def $scc - ; CHECK-NEXT: %200.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 %50.sub0, 224, implicit-def $scc + ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], undef %171:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %171:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY7]].sub0, 224, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_1:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %210.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: %210.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %217.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: %217.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %224.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: %224.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]]:sreg_32 = S_ADD_U32 %50.sub0, 576, implicit-def $scc + ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_9]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_9]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_9]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY7]].sub0, 576, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_2:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %241.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: %241.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %253.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: %253.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %261.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], undef %171:sreg_32, implicit-def $scc - ; CHECK-NEXT: %261.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %273.sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: %273.sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %286.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: %286.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %293.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: %293.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_13]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_13]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_13]], undef %171:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %302, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %302, undef %314:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %302, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %302, 16, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %312:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %369:sgpr_128, undef %370:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %380:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %156, 0, 0 :: (invariant load (s128) from %ir.92, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %163, 0, 0 :: (invariant load (s128) from %ir.97, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %176, 0, 0 :: (invariant load (s128) from %ir.104, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %183, 0, 0 :: (invariant load (s128) from %ir.109, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %190, 0, 0 :: (invariant load (s128) from %ir.114, addrspace 4) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %367:sgpr_128, undef %368:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %378:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 0, 0 :: (invariant load (s128) from %ir.92, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 0, 0 :: (invariant load (s128) from %ir.97, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.104, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.109, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.114, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %364:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %375:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %362:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %373:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM2]], -130, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc - ; CHECK-NEXT: undef %327.sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: %327.sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %335.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: %335.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %343.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0 :: (invariant load (s128) from %ir.121, addrspace 4) - ; CHECK-NEXT: %343.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %351.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: %351.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY10]], 4, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 0, 0 :: (invariant load (s128) from %ir.121, addrspace 4) + ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 4, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %396:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %394:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %50, 224, 0 :: (invariant load (s128) from %ir.126, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %210, 0, 0 :: (invariant load (s128) from %ir.127, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.126, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.127, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %217, 0, 0 :: (invariant load (s128) from %ir.132, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %224, 0, 0 :: (invariant load (s128) from %ir.137, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.132, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.137, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) @@ -146,119 +146,119 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -329, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -345, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM6]], -441, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], 160, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], 160, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_3:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %36:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %411.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: %411.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY11]], 4, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_25:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_24]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_25:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY13]], 4, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef %425.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_4]], implicit-def $scc - ; CHECK-NEXT: %425.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]]:sreg_32 = S_ADD_U32 %56.sub0, 168, implicit-def $scc + ; CHECK-NEXT: undef [[S_ADD_U32_26:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_24]], [[S_LSHL_B32_4]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_26:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_ADD_U32_27:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, 168, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0 :: (invariant load (s128) from %ir.147, addrspace 4) - ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.147, addrspace 4) + ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef %441.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_5]], implicit-def $scc - ; CHECK-NEXT: %441.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %441, 0, 0 :: (invariant load (s32) from %ir.269, align 8, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %253, 0, 0 :: (invariant load (s128) from %ir.154, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_28:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_5]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_28:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_28]], 0, 0 :: (invariant load (s32) from %ir.269, align 8, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.154, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %261, 0, 0 :: (invariant load (s128) from %ir.159, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 0, 0 :: (invariant load (s128) from %ir.159, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: %71.sub3:sgpr_128 = S_MOV_B32 553734060 - ; CHECK-NEXT: %71.sub2:sgpr_128 = S_MOV_B32 -1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY %71 - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0 :: (invariant load (s128) from %ir.167, addrspace 4) - ; CHECK-NEXT: [[COPY13]].sub1:sgpr_128 = COPY %302.sub1 - ; CHECK-NEXT: [[COPY13]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]] - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY13]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub3:sgpr_128 = S_MOV_B32 553734060 + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 -1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 0, 0 :: (invariant load (s128) from %ir.167, addrspace 4) + ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub1 + ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]] + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0 :: (invariant load (s128) from %ir.175, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0 :: (invariant load (s128) from %ir.180, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_18]], 0, 0 :: (invariant load (s128) from %ir.175, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_19]], 0, 0 :: (invariant load (s128) from %ir.180, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc - ; CHECK-NEXT: undef %453.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_6]], implicit-def $scc - ; CHECK-NEXT: %453.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %453, 0, 0 :: (invariant load (s64) from %ir.277, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_29:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_6]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_29:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_29]], 0, 0 :: (invariant load (s64) from %ir.277, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0 :: (invariant load (s128) from %ir.202, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0 :: (invariant load (s128) from %ir.208, addrspace 4) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY %71 - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0 :: (invariant load (s128) from %ir.213, addrspace 4) - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc - ; CHECK-NEXT: [[COPY14]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 - ; CHECK-NEXT: [[COPY14]].sub1:sgpr_128 = COPY [[S_AND_B32_]] - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY14]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0 :: (invariant load (s128) from %ir.218, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_20]], 0, 0 :: (invariant load (s128) from %ir.202, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_21]], 0, 0 :: (invariant load (s128) from %ir.208, addrspace 4) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 0, 0 :: (invariant load (s128) from %ir.213, addrspace 4) + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc + ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 + ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_]] + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY16]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 0, 0 :: (invariant load (s128) from %ir.218, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc - ; CHECK-NEXT: undef %468.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_7]], implicit-def $scc - ; CHECK-NEXT: %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_30:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_7]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_30:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %468, 0, 0 :: (invariant load (s64) from %ir.287, addrspace 4) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY %71 - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc - ; CHECK-NEXT: [[COPY15]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 - ; CHECK-NEXT: [[COPY15]].sub1:sgpr_128 = COPY [[S_AND_B32_1]] - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0 :: (invariant load (s128) from %ir.253, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: KILL %411.sub0, %411.sub1 - ; CHECK-NEXT: KILL undef %488:sreg_64 - ; CHECK-NEXT: KILL [[COPY15]].sub0_sub1_sub2, [[COPY15]].sub3 - ; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0 :: (invariant load (s128) from %ir.261, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_30]], 0, 0 :: (invariant load (s64) from %ir.287, addrspace 4) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM2]].sub1, 65535, implicit-def dead $scc + ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM2]].sub0 + ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]] + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_25]], 0, 0 :: (invariant load (s128) from %ir.253, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %484:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4) + ; CHECK-NEXT: KILL [[S_ADD_U32_25]].sub0, [[S_ADD_U32_25]].sub1 + ; CHECK-NEXT: KILL undef %484:sreg_64 + ; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3 + ; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_26]], 0, 0 :: (invariant load (s128) from %ir.261, addrspace 4) ; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc - ; CHECK-NEXT: undef %485.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_8]], implicit-def $scc - ; CHECK-NEXT: %485.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %485, 0, 0 :: (invariant load (s32) from %ir.298, align 8, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_31:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_8]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_31:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_31]], 0, 0 :: (invariant load (s32) from %ir.298, align 8, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]] ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY %71 - ; CHECK-NEXT: [[COPY16]].sub1:sgpr_128 = COPY [[S_AND_B32_2]] - ; CHECK-NEXT: [[COPY16]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM2]] - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY16]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM2]] + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY18]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -475, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -491, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -507, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -539, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], 96, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_32:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], 96, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0 :: (invariant load (s128) from %ir.316, addrspace 4) - ; CHECK-NEXT: undef %522.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: %522.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0 :: (invariant load (s128) from %ir.321, addrspace 4) - ; CHECK-NEXT: undef %530.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: %530.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0 :: (invariant load (s128) from %ir.326, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_33:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_32]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_33:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_33]], 0, 0 :: (invariant load (s128) from %ir.316, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_34:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_32]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_34:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_34]], 0, 0 :: (invariant load (s128) from %ir.321, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_35:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_32]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_35:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_35]], 0, 0 :: (invariant load (s128) from %ir.326, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]] ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]] + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]] ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec @@ -363,20 +363,20 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[V_OR_B32_e64_61:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_60]], [[V_ADD_U32_e64_25]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -575, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_62:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_61]], [[V_ADD_U32_e64_26]], implicit $exec - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM8:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM8:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -576, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_63:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_62]], [[V_ADD_U32_e64_27]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -577, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec - ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %564:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) undef`, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %559:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec ; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc ; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec - ; CHECK-NEXT: undef %624.sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec - ; CHECK-NEXT: IMAGE_STORE_V4_V2_gfx10 %624, undef %578:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) + ; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec + ; CHECK-NEXT: IMAGE_STORE_V4_V2_gfx10 [[V_CNDMASK_B32_e64_]], undef %573:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 .expVert: %0 = extractelement <31 x i32> %userData, i64 2 diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index 24319a639da447..520ec6e24ae3bf 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -1779,30 +1779,29 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v4, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[3:4] +; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[3:4] ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[3:4] ; GCN-IR-NEXT: v_cndmask_b32_e64 v5, v5, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, 0 -; GCN-IR-NEXT: s_mov_b64 s[8:9], 0x8000 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v3 -; GCN-IR-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v3 +; GCN-IR-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc +; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 ; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[7:8] -; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[8:9], v2 +; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[4:5], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, -1, v0 -; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 ; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v1, vcc ; GCN-IR-NEXT: v_lshr_b64 v[8:9], s[4:5], v7 ; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, 47, v6 diff --git a/llvm/test/CodeGen/AMDGPU/swdev380865.ll b/llvm/test/CodeGen/AMDGPU/swdev380865.ll index 6c98c7def23289..9189cef019cf40 100644 --- a/llvm/test/CodeGen/AMDGPU/swdev380865.ll +++ b/llvm/test/CodeGen/AMDGPU/swdev380865.ll @@ -16,94 +16,43 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce) ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_mov_b64 s[0:1], 0 ; CHECK-NEXT: s_load_dword s2, s[0:1], 0x0 -; CHECK-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane -; CHECK-NEXT: ; kill: killed $sgpr0_sgpr1 -; CHECK-NEXT: s_mov_b32 s7, 0x401c0000 -; CHECK-NEXT: s_mov_b32 s5, 0x40280000 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_writelane_b32 v2, s2, 0 -; CHECK-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; CHECK-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 +; CHECK-NEXT: s_mov_b32 s4, 0 ; CHECK-NEXT: s_mov_b32 s0, 0 -; CHECK-NEXT: s_mov_b32 s1, 0x40140000 -; CHECK-NEXT: s_mov_b32 s1, 0x40180000 -; CHECK-NEXT: v_writelane_b32 v2, s0, 1 -; CHECK-NEXT: v_writelane_b32 v2, s1, 2 -; CHECK-NEXT: s_mov_b32 s1, 0x40220000 -; CHECK-NEXT: v_writelane_b32 v2, s0, 3 -; CHECK-NEXT: v_writelane_b32 v2, s1, 4 -; CHECK-NEXT: s_mov_b32 s1, 0x40240000 -; CHECK-NEXT: v_writelane_b32 v2, s0, 5 -; CHECK-NEXT: v_writelane_b32 v2, s1, 6 -; CHECK-NEXT: s_mov_b32 s1, 0x40260000 -; CHECK-NEXT: v_writelane_b32 v2, s0, 7 +; CHECK-NEXT: s_mov_b32 s5, 0x40280000 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-NEXT: v_writelane_b32 v2, s1, 8 -; CHECK-NEXT: v_mov_b32_e32 v1, s3 +; CHECK-NEXT: s_mov_b32 s1, s2 +; CHECK-NEXT: s_mov_b32 s2, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, s6 +; CHECK-NEXT: s_mov_b32 s3, 0x40260000 +; CHECK-NEXT: v_mov_b32_e32 v1, s7 ; CHECK-NEXT: .LBB0_1: ; %for.cond4.preheader ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], 0 -; CHECK-NEXT: s_mov_b32 s2, 0 -; CHECK-NEXT: s_mov_b32 s3, 0x40140000 -; CHECK-NEXT: v_writelane_b32 v2, s0, 9 -; CHECK-NEXT: v_writelane_b32 v2, s6, 10 -; CHECK-NEXT: v_writelane_b32 v2, s7, 11 -; CHECK-NEXT: v_readlane_b32 s6, v2, 1 -; CHECK-NEXT: v_readlane_b32 s7, v2, 2 -; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] -; CHECK-NEXT: s_mov_b32 s1, s7 -; CHECK-NEXT: s_mov_b32 s0, s2 -; CHECK-NEXT: v_writelane_b32 v2, s6, 1 -; CHECK-NEXT: v_writelane_b32 v2, s7, 2 -; CHECK-NEXT: v_readlane_b32 s6, v2, 10 -; CHECK-NEXT: v_readlane_b32 s7, v2, 11 -; CHECK-NEXT: s_mov_b32 s6, s2 -; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[0:1] -; CHECK-NEXT: v_readlane_b32 s0, v2, 3 -; CHECK-NEXT: v_readlane_b32 s1, v2, 4 -; CHECK-NEXT: s_mov_b32 s3, s1 -; CHECK-NEXT: s_mov_b32 s0, 0 -; CHECK-NEXT: s_mov_b32 s1, 0x40140000 -; CHECK-NEXT: s_mov_b32 s2, s0 -; CHECK-NEXT: s_mov_b32 s1, s3 +; CHECK-NEXT: s_mov_b32 s6, 0 +; CHECK-NEXT: s_mov_b32 s7, 0x40140000 +; CHECK-NEXT: s_add_i32 s0, s0, s1 +; CHECK-NEXT: s_cmpk_lt_i32 s0, 0xa00 +; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[6:7] +; CHECK-NEXT: s_mov_b32 s6, 0 +; CHECK-NEXT: s_mov_b32 s7, 0x40180000 +; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[6:7] +; CHECK-NEXT: s_mov_b32 s6, 0 +; CHECK-NEXT: s_mov_b32 s7, 0x401c0000 +; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[6:7] +; CHECK-NEXT: s_mov_b32 s6, 0 +; CHECK-NEXT: s_mov_b32 s7, 0x40220000 +; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[6:7] +; CHECK-NEXT: s_mov_b32 s6, 0 +; CHECK-NEXT: s_mov_b32 s7, 0x40240000 ; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[6:7] -; CHECK-NEXT: v_writelane_b32 v2, s0, 3 -; CHECK-NEXT: v_writelane_b32 v2, s1, 4 -; CHECK-NEXT: v_readlane_b32 s0, v2, 5 -; CHECK-NEXT: v_readlane_b32 s1, v2, 6 -; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] -; CHECK-NEXT: s_mov_b32 s3, s1 -; CHECK-NEXT: s_mov_b32 s0, 0 -; CHECK-NEXT: s_mov_b32 s1, 0x40140000 -; CHECK-NEXT: s_mov_b32 s2, s0 -; CHECK-NEXT: s_mov_b32 s1, s3 -; CHECK-NEXT: v_writelane_b32 v2, s0, 5 -; CHECK-NEXT: v_writelane_b32 v2, s1, 6 -; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] -; CHECK-NEXT: v_readlane_b32 s0, v2, 7 -; CHECK-NEXT: v_readlane_b32 s1, v2, 8 -; CHECK-NEXT: s_mov_b32 s3, s1 -; CHECK-NEXT: s_mov_b32 s0, 0 -; CHECK-NEXT: s_mov_b32 s1, 0x40140000 -; CHECK-NEXT: s_mov_b32 s2, s0 -; CHECK-NEXT: s_mov_b32 s1, s3 ; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3] -; CHECK-NEXT: v_writelane_b32 v2, s0, 7 -; CHECK-NEXT: s_mov_b32 s4, s0 -; CHECK-NEXT: v_writelane_b32 v2, s1, 8 -; CHECK-NEXT: v_readlane_b32 s0, v2, 0 -; CHECK-NEXT: v_readlane_b32 s2, v2, 9 -; CHECK-NEXT: s_add_i32 s2, s2, s0 -; CHECK-NEXT: v_writelane_b32 v2, s2, 9 ; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[4:5] -; CHECK-NEXT: v_readlane_b32 s0, v2, 9 -; CHECK-NEXT: s_cmpk_lt_i32 s0, 0xa00 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_1 ; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup.loopexit +; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: v_mov_b32_e32 v3, 0 -; CHECK-NEXT: v_mov_b32_e32 v4, 0 -; CHECK-NEXT: global_store_dwordx2 v[3:4], v[0:1], off -; CHECK-NEXT: ; kill: killed $vgpr2 +; CHECK-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; CHECK-NEXT: s_endpgm entry: %0 = load i32, ptr addrspace(4) null, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll index dcf49de6849240..05de0bc5f282ad 100644 --- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll +++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll @@ -39,68 +39,68 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 ; GLOBALNESS1-NEXT: global_store_dword v[0:1], v42, off ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS1-NEXT: global_load_dword v0, v42, s[76:77] +; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[76:77] ; GLOBALNESS1-NEXT: s_mov_b64 s[40:41], s[4:5] ; GLOBALNESS1-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18 ; GLOBALNESS1-NEXT: s_load_dword s7, s[8:9], 0x20 ; GLOBALNESS1-NEXT: s_add_u32 flat_scratch_lo, s12, s17 ; GLOBALNESS1-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; GLOBALNESS1-NEXT: s_add_u32 s0, s0, s17 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS1-NEXT: s_addc_u32 s1, s1, 0 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, 0x40994400 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x40994400 ; GLOBALNESS1-NEXT: s_bitcmp1_b32 s78, 0 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e32 vcc, s[4:5], v[42:43] +; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e32 vcc, s[4:5], v[0:1] ; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[4:5], 0 -; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] ; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] ; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GLOBALNESS1-NEXT: s_bitcmp1_b32 s6, 0 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[42:43], 1, v1 -; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[42:43], 1, v0 +; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GLOBALNESS1-NEXT: s_bitcmp1_b32 s7, 0 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[44:45], 1, v2 -; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[48:49], 1, v0 +; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[6:7] ; GLOBALNESS1-NEXT: s_add_u32 s6, s6, wobble@gotpcrel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s7, s7, wobble@gotpcrel32@hi+12 ; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[48:49], 1, v2 -; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] -; GLOBALNESS1-NEXT: s_load_dwordx2 s[76:77], s[6:7], 0x0 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[50:51], 1, v2 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[50:51], 1, v0 +; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GLOBALNESS1-NEXT: s_load_dwordx2 s[74:75], s[6:7], 0x0 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[52:53], 1, v0 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[44:45], 1, v1 ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[46:47], 1, v3 ; GLOBALNESS1-NEXT: s_mov_b32 s70, s16 ; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[8:9] ; GLOBALNESS1-NEXT: s_mov_b32 s71, s15 ; GLOBALNESS1-NEXT: s_mov_b32 s72, s14 ; GLOBALNESS1-NEXT: s_mov_b64 s[34:35], s[10:11] -; GLOBALNESS1-NEXT: s_mov_b64 s[74:75], 0x80 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[60:61], 1, v1 ; GLOBALNESS1-NEXT: s_mov_b32 s32, 0 ; GLOBALNESS1-NEXT: ; implicit-def: $vgpr44_vgpr45 ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) -; GLOBALNESS1-NEXT: v_cmp_gt_i32_e32 vcc, 0, v0 -; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GLOBALNESS1-NEXT: v_cmp_gt_i32_e32 vcc, 1, v0 -; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GLOBALNESS1-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GLOBALNESS1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GLOBALNESS1-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[52:53], 1, v2 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[54:55], 1, v3 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[56:57], 1, v4 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[58:59], 1, v0 +; GLOBALNESS1-NEXT: v_cmp_gt_i32_e32 vcc, 1, v2 +; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GLOBALNESS1-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GLOBALNESS1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[54:55], 1, v0 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[56:57], 1, v1 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[58:59], 1, v3 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[60:61], 1, v2 ; GLOBALNESS1-NEXT: s_branch .LBB1_4 ; GLOBALNESS1-NEXT: .LBB1_1: ; %bb70.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[58:59] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[60:61] ; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_29 ; GLOBALNESS1-NEXT: .LBB1_2: ; %Flow15 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 @@ -115,7 +115,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: .LBB1_4: ; %bb5 ; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1 ; GLOBALNESS1-NEXT: ; Child Loop BB1_16 Depth 2 -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[74:75], s[74:75] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0x80 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0 ; GLOBALNESS1-NEXT: flat_load_dword v40, v[0:1] ; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 ; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0 @@ -129,7 +130,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[76:77] +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[74:75] ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[46:47] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1 ; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 @@ -165,12 +166,12 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[62:63], 0, v0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x3ff00000 -; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[80:81], s[62:63] +; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[76:77], s[62:63] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26 ; GLOBALNESS1-NEXT: ; %bb.11: ; %bb33.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[2:3], off -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[52:53] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[54:55] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_13 ; GLOBALNESS1-NEXT: ; %bb.12: ; %bb39.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 @@ -192,16 +193,16 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5] ; GLOBALNESS1-NEXT: .LBB1_15: ; %bb63.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[50:51] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[52:53] ; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_25 ; GLOBALNESS1-NEXT: .LBB1_16: ; %bb44.i ; GLOBALNESS1-NEXT: ; Parent Loop BB1_4 Depth=1 ; GLOBALNESS1-NEXT: ; => This Inner Loop Header: Depth=2 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[60:61] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[48:49] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS1-NEXT: ; %bb.17: ; %bb46.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[48:49] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[50:51] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS1-NEXT: ; %bb.18: ; %bb50.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 @@ -216,7 +217,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[66:67] ; GLOBALNESS1-NEXT: .LBB1_21: ; %spam.exit.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[54:55] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[56:57] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS1-NEXT: ; %bb.22: ; %bb55.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 @@ -230,7 +231,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_mov_b32 s13, s71 ; GLOBALNESS1-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 -; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[76:77] +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[74:75] ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[46:47], 0, 0 ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] @@ -241,7 +242,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: global_store_dwordx2 v[46:47], v[44:45], off -; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[76:77] +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[74:75] ; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[64:65] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_14 ; GLOBALNESS1-NEXT: ; %bb.23: ; %bb62.i @@ -258,12 +259,12 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 ; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow24 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[80:81] +; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[76:77] ; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[62:63] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2 ; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[56:57] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[58:59] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_1 ; GLOBALNESS1-NEXT: ; %bb.28: ; %bb69.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 @@ -325,68 +326,68 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 ; GLOBALNESS0-NEXT: global_store_dword v[0:1], v42, off ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS0-NEXT: global_load_dword v0, v42, s[72:73] +; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[72:73] ; GLOBALNESS0-NEXT: s_mov_b64 s[40:41], s[4:5] ; GLOBALNESS0-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18 ; GLOBALNESS0-NEXT: s_load_dword s7, s[8:9], 0x20 ; GLOBALNESS0-NEXT: s_add_u32 flat_scratch_lo, s12, s17 ; GLOBALNESS0-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; GLOBALNESS0-NEXT: s_add_u32 s0, s0, s17 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS0-NEXT: s_addc_u32 s1, s1, 0 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, 0x40994400 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x40994400 ; GLOBALNESS0-NEXT: s_bitcmp1_b32 s74, 0 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e32 vcc, s[4:5], v[42:43] +; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e32 vcc, s[4:5], v[0:1] ; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[4:5], 0 -; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] ; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] ; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GLOBALNESS0-NEXT: s_bitcmp1_b32 s6, 0 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[42:43], 1, v1 -; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[42:43], 1, v0 +; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GLOBALNESS0-NEXT: s_bitcmp1_b32 s7, 0 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[44:45], 1, v2 -; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[48:49], 1, v0 +; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[6:7] ; GLOBALNESS0-NEXT: s_add_u32 s6, s6, wobble@gotpcrel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s7, s7, wobble@gotpcrel32@hi+12 ; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[48:49], 1, v2 -; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] -; GLOBALNESS0-NEXT: s_load_dwordx2 s[78:79], s[6:7], 0x0 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[50:51], 1, v2 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[50:51], 1, v0 +; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GLOBALNESS0-NEXT: s_load_dwordx2 s[76:77], s[6:7], 0x0 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[52:53], 1, v0 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[44:45], 1, v1 ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[46:47], 1, v3 ; GLOBALNESS0-NEXT: s_mov_b32 s68, s16 ; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[8:9] ; GLOBALNESS0-NEXT: s_mov_b32 s69, s15 ; GLOBALNESS0-NEXT: s_mov_b32 s70, s14 ; GLOBALNESS0-NEXT: s_mov_b64 s[34:35], s[10:11] -; GLOBALNESS0-NEXT: s_mov_b64 s[76:77], 0x80 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[60:61], 1, v1 ; GLOBALNESS0-NEXT: s_mov_b32 s32, 0 ; GLOBALNESS0-NEXT: ; implicit-def: $vgpr44_vgpr45 ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) -; GLOBALNESS0-NEXT: v_cmp_gt_i32_e32 vcc, 0, v0 -; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GLOBALNESS0-NEXT: v_cmp_gt_i32_e32 vcc, 1, v0 -; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GLOBALNESS0-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GLOBALNESS0-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GLOBALNESS0-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[52:53], 1, v2 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[54:55], 1, v3 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[56:57], 1, v4 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[58:59], 1, v0 +; GLOBALNESS0-NEXT: v_cmp_gt_i32_e32 vcc, 1, v2 +; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GLOBALNESS0-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GLOBALNESS0-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[54:55], 1, v0 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[56:57], 1, v1 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[58:59], 1, v3 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[60:61], 1, v2 ; GLOBALNESS0-NEXT: s_branch .LBB1_4 ; GLOBALNESS0-NEXT: .LBB1_1: ; %bb70.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[58:59] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[60:61] ; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_29 ; GLOBALNESS0-NEXT: .LBB1_2: ; %Flow15 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 @@ -401,7 +402,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: .LBB1_4: ; %bb5 ; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1 ; GLOBALNESS0-NEXT: ; Child Loop BB1_16 Depth 2 -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[76:77], s[76:77] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0x80 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0 ; GLOBALNESS0-NEXT: flat_load_dword v40, v[0:1] ; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 ; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0 @@ -415,7 +417,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_mov_b32 s14, s68 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[78:79] +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[76:77] ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[46:47] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1 ; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 @@ -451,12 +453,12 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[62:63], 0, v0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x3ff00000 -; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[80:81], s[62:63] +; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[78:79], s[62:63] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26 ; GLOBALNESS0-NEXT: ; %bb.11: ; %bb33.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[2:3], off -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[52:53] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[54:55] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_13 ; GLOBALNESS0-NEXT: ; %bb.12: ; %bb39.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 @@ -478,16 +480,16 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5] ; GLOBALNESS0-NEXT: .LBB1_15: ; %bb63.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[50:51] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[52:53] ; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_25 ; GLOBALNESS0-NEXT: .LBB1_16: ; %bb44.i ; GLOBALNESS0-NEXT: ; Parent Loop BB1_4 Depth=1 ; GLOBALNESS0-NEXT: ; => This Inner Loop Header: Depth=2 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[60:61] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[48:49] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS0-NEXT: ; %bb.17: ; %bb46.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[48:49] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[50:51] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS0-NEXT: ; %bb.18: ; %bb50.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 @@ -502,7 +504,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[66:67] ; GLOBALNESS0-NEXT: .LBB1_21: ; %spam.exit.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[54:55] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[56:57] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS0-NEXT: ; %bb.22: ; %bb55.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 @@ -516,7 +518,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_mov_b32 s13, s69 ; GLOBALNESS0-NEXT: s_mov_b32 s14, s68 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 -; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[78:79] +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[76:77] ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[46:47], 0, 0 ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] @@ -527,7 +529,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_mov_b32 s14, s68 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: global_store_dwordx2 v[46:47], v[44:45], off -; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[78:79] +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[76:77] ; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[64:65] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_14 ; GLOBALNESS0-NEXT: ; %bb.23: ; %bb62.i @@ -544,12 +546,12 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 ; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow24 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[80:81] +; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[78:79] ; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[62:63] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2 ; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[56:57] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[58:59] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_1 ; GLOBALNESS0-NEXT: ; %bb.28: ; %bb69.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index 7aa36a8b377bff..e809292aad1d38 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -1165,30 +1165,29 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v5, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[4:5] +; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[4:5] ; GCN-IR-NEXT: v_mov_b32_e32 v3, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[4:5] ; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, 0 -; GCN-IR-NEXT: s_mov_b64 s[8:9], 0x8000 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB9_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v4 -; GCN-IR-NEXT: v_addc_u32_e32 v8, vcc, 0, v5, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v4 +; GCN-IR-NEXT: v_addc_u32_e32 v8, vcc, 0, v5, vcc +; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 ; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[7:8] -; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[8:9], v2 +; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[4:5], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB9_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, -1, v0 -; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 ; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v1, vcc ; GCN-IR-NEXT: v_lshr_b64 v[8:9], s[4:5], v7 ; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, 47, v6 diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll index 2f82260888a7da..86e2822a3e5b16 100644 --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -33,8 +33,8 @@ define hidden void @widget() { ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_load_dword v0, v[0:1] -; GCN-NEXT: s_mov_b64 s[16:17], 0 ; GCN-NEXT: s_mov_b64 s[20:21], -1 +; GCN-NEXT: s_mov_b64 s[16:17], 0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0 ; GCN-NEXT: s_mov_b64 s[46:47], 0 @@ -303,12 +303,12 @@ define hidden void @blam() { ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] -; GCN-NEXT: s_mov_b64 s[50:51], 0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: v_and_b32_e32 v2, 0x3ff, v41 -; GCN-NEXT: flat_load_dword v44, v[0:1] ; GCN-NEXT: v_mov_b32_e32 v43, 0 +; GCN-NEXT: flat_load_dword v44, v[0:1] +; GCN-NEXT: s_mov_b64 s[50:51], 0 ; GCN-NEXT: s_getpc_b64 s[52:53] ; GCN-NEXT: s_add_u32 s52, s52, spam@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s53, s53, spam@rel32@hi+12 @@ -329,10 +329,10 @@ define hidden void @blam() { ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: flat_load_dword v0, v[42:43] ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], 0 -; GCN-NEXT: s_mov_b64 s[4:5], -1 +; GCN-NEXT: s_mov_b64 s[6:7], 0 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 2, v0 -; GCN-NEXT: s_mov_b64 s[6:7], 0 +; GCN-NEXT: s_mov_b64 s[4:5], -1 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GCN-NEXT: s_xor_b64 s[56:57], exec, s[8:9] ; GCN-NEXT: s_cbranch_execz .LBB1_12 diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index 91d09c01639ffc..9c316612528c20 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -1158,30 +1158,29 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v4, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[3:4] +; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[3:4] ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[3:4] ; GCN-IR-NEXT: v_cndmask_b32_e64 v5, v5, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, 0 -; GCN-IR-NEXT: s_mov_b64 s[8:9], 0x8000 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB8_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v3 -; GCN-IR-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v3 +; GCN-IR-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc +; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 ; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[7:8] -; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[8:9], v2 +; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[4:5], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB8_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, -1, v0 -; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 ; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v1, vcc ; GCN-IR-NEXT: v_lshr_b64 v[8:9], s[4:5], v7 ; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, 47, v6 diff --git a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll index f1edd5c74b1054..0f32eb1b12771f 100644 --- a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll +++ b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll @@ -257,11 +257,10 @@ define amdgpu_kernel void @test_s0_s1_k_f32(ptr addrspace(1) %out, float %a, flo ; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB0:[0-9]+]], s[[SGPR1_SUB0]] ; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB1:[0-9]+]], s[[SGPR1_SUB1]] -; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v[[[VS1_SUB0]]:[[VS1_SUB1]]], v[[[VZERO]]:[[VK0_SUB1]]] +; GCN-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v[[[VS1_SUB0]]:[[VS1_SUB1]]], v[[[VZERO]]:[[VK0_SUB1]]] -; Same zero component is re-used for half of each immediate. -; GCN: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000 -; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v[[[VS1_SUB0]]:[[VS1_SUB1]]], v[[[VZERO]]:[[VK1_SUB1]]] +; GCN-DAG: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000 +; GCN-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v[[[VS1_SUB0]]:[[VS1_SUB1]]], v[{{[0-9]+}}:[[VK1_SUB1]]] ; GCN: buffer_store_dwordx2 [[RESULT0]] ; GCN: buffer_store_dwordx2 [[RESULT1]] diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll index cc73302f856373..aa65a62e242daa 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -474,7 +474,7 @@ define amdgpu_kernel void @livevariables_update_missed_block(ptr addrspace(1) %s ; SI-NEXT: successors: %bb.7(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.src1.kernarg.offset, align 4, addrspace 4) - ; SI-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %51, 0, implicit $exec + ; SI-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %48, 0, implicit $exec ; SI-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed [[V_ADDC_U32_e64_]], %subreg.sub1 ; SI-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8) from %ir.i10, addrspace 1) @@ -502,14 +502,14 @@ define amdgpu_kernel void @livevariables_update_missed_block(ptr addrspace(1) %s ; SI-NEXT: bb.5.Flow: ; SI-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY1]](s32), %bb.0, undef %52:vgpr_32, %bb.6 + ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY1]](s32), %bb.0, undef %49:vgpr_32, %bb.6 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.6.sw.bb18: ; SI-NEXT: successors: %bb.5(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI undef %36:vgpr_32, %bb.3, [[GLOBAL_LOAD_UBYTE1]], %bb.4 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI undef %33:vgpr_32, %bb.3, [[GLOBAL_LOAD_UBYTE1]], %bb.4 ; SI-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec ; SI-NEXT: GLOBAL_STORE_BYTE killed [[V_MOV_B2]], killed [[PHI1]], 0, 0, implicit $exec :: (store (s8) into `ptr addrspace(1) null`, addrspace 1) ; SI-NEXT: S_BRANCH %bb.5 From 72e6c1c70d5e07bbc8cb7cae2ed915108daf93aa Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 30 Oct 2023 15:17:00 +0000 Subject: [PATCH 028/144] [RISCV] Begin moving post-isel vector peepholes to a MF pass (#70342) We currently have three postprocess peephole optimisations for vector pseudos: 1) Masked pseudo with all ones mask -> unmasked pseudo 2) Merge vmerge pseudo into operand pseudo's mask 3) vmerge pseudo with all ones mask -> vmv.v.v pseudo This patch aims to move these peepholes out of SelectionDAG and into a separate RISCVFoldMasks MachineFunction pass. There are a few motivations for doing this: * The current SelectionDAG implementation operates on MachineSDNodes, which are essentially MachineInstrs but require a bunch of logic to reason about chain and glue operands. The RISCVII::has*Op helper functions also don't exactly line up with the SDNode operands. Mutating these pseudos and their operands in place becomes a good bit easier at the MachineInstr level. For example, we would no longer need to check for cycles in the DAG during performCombineVMergeAndVOps. * Although it's further down the line, moving this code out of SelectionDAG allows it to be reused by GlobalISel later on. * In performCombineVMergeAndVOps, it may be possible to commute the operands to enable folding in more cases (see test/CodeGen/RISCV/rvv/vmadd-vp.ll). There is existing machinery to commute operands in TII::commuteInstruction, but it's implemented on MachineInstrs. The pass runs straight after ISel, before any of the other machine SSA optimization passes run. This is so that dead-mi-elimination can mop up any vmsets that are no longer used (but if preferred we could try and erase them from inside RISCVFoldMasks itself). This also means that these peepholes are no longer run at codegen -O0, so this patch isn't strictly NFC. Only the performVMergeToVMv peephole is refactored in this patch, the remaining two would be implemented later. And as noted by @preames, it should be possible to move doPeepholeSExtW out of SelectionDAG as well. --- llvm/lib/Target/RISCV/CMakeLists.txt | 1 + llvm/lib/Target/RISCV/RISCV.h | 3 + llvm/lib/Target/RISCV/RISCVFoldMasks.cpp | 174 ++++++++++++++++++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 36 ---- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 4 + llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 + .../RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir | 71 +++++++ 7 files changed, 254 insertions(+), 36 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVFoldMasks.cpp create mode 100644 llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 4d5fa79389ea68..b0282b72c6a8db 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -33,6 +33,7 @@ add_llvm_target(RISCVCodeGen RISCVMakeCompressible.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp + RISCVFoldMasks.cpp RISCVFrameLowering.cpp RISCVGatherScatterLowering.cpp RISCVInsertVSETVLI.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 3d8e33dc716ea4..4e870d444120c2 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -45,6 +45,9 @@ void initializeRISCVMakeCompressibleOptPass(PassRegistry &); FunctionPass *createRISCVGatherScatterLoweringPass(); void initializeRISCVGatherScatterLoweringPass(PassRegistry &); +FunctionPass *createRISCVFoldMasksPass(); +void initializeRISCVFoldMasksPass(PassRegistry &); + FunctionPass *createRISCVOptWInstrsPass(); void initializeRISCVOptWInstrsPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp new file mode 100644 index 00000000000000..d1c77a6cc7756d --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp @@ -0,0 +1,174 @@ +//===- RISCVFoldMasks.cpp - MI Vector Pseudo Mask Peepholes ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// This pass performs various peephole optimisations that fold masks into vector +// pseudo instructions after instruction selection. +// +// Currently it converts +// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew +// -> +// PseudoVMV_V_V %false, %true, %vl, %sew +// +//===---------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-fold-masks" + +namespace { + +class RISCVFoldMasks : public MachineFunctionPass { +public: + static char ID; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + RISCVFoldMasks() : MachineFunctionPass(ID) { + initializeRISCVFoldMasksPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + + StringRef getPassName() const override { return "RISC-V Fold Masks"; } + +private: + bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef); + + bool isAllOnesMask(MachineInstr *MaskCopy); +}; + +} // namespace + +char RISCVFoldMasks::ID = 0; + +INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false) + +bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskCopy) { + if (!MaskCopy) + return false; + assert(MaskCopy->isCopy() && MaskCopy->getOperand(0).getReg() == RISCV::V0); + Register SrcReg = + TRI->lookThruCopyLike(MaskCopy->getOperand(1).getReg(), MRI); + if (!SrcReg.isVirtual()) + return false; + MachineInstr *SrcDef = MRI->getVRegDef(SrcReg); + if (!SrcDef) + return false; + + // TODO: Check that the VMSET is the expected bitwidth? The pseudo has + // undefined behaviour if it's the wrong bitwidth, so we could choose to + // assume that it's all-ones? Same applies to its VL. + switch (SrcDef->getOpcode()) { + case RISCV::PseudoVMSET_M_B1: + case RISCV::PseudoVMSET_M_B2: + case RISCV::PseudoVMSET_M_B4: + case RISCV::PseudoVMSET_M_B8: + case RISCV::PseudoVMSET_M_B16: + case RISCV::PseudoVMSET_M_B32: + case RISCV::PseudoVMSET_M_B64: + return true; + default: + return false; + } +} + +// Transform (VMERGE_VVM_ false, false, true, allones, vl, sew) to +// (VMV_V_V_ false, true, vl, sew). It may decrease uses of VMSET. +bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) { +#define CASE_VMERGE_TO_VMV(lmul) \ + case RISCV::PseudoVMERGE_VVM_##lmul: \ + NewOpc = RISCV::PseudoVMV_V_V_##lmul; \ + break; + unsigned NewOpc; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Expected VMERGE_VVM_ instruction."); + CASE_VMERGE_TO_VMV(MF8) + CASE_VMERGE_TO_VMV(MF4) + CASE_VMERGE_TO_VMV(MF2) + CASE_VMERGE_TO_VMV(M1) + CASE_VMERGE_TO_VMV(M2) + CASE_VMERGE_TO_VMV(M4) + CASE_VMERGE_TO_VMV(M8) + } + + Register MergeReg = MI.getOperand(1).getReg(); + Register FalseReg = MI.getOperand(2).getReg(); + // Check merge == false (or merge == undef) + if (MergeReg != RISCV::NoRegister && TRI->lookThruCopyLike(MergeReg, MRI) != + TRI->lookThruCopyLike(FalseReg, MRI)) + return false; + + assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0); + if (!isAllOnesMask(V0Def)) + return false; + + MI.setDesc(TII->get(NewOpc)); + MI.removeOperand(1); // Merge operand + MI.tieOperands(0, 1); // Tie false to dest + MI.removeOperand(3); // Mask operand + MI.addOperand( + MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)); + + // vmv.v.v doesn't have a mask operand, so we may be able to inflate the + // register class for the destination and merge operands e.g. VRNoV0 -> VR + MRI->recomputeRegClass(MI.getOperand(0).getReg()); + MRI->recomputeRegClass(MI.getOperand(1).getReg()); + return true; +} + +bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + // Skip if the vector extension is not enabled. + const RISCVSubtarget &ST = MF.getSubtarget(); + if (!ST.hasVInstructions()) + return false; + + TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + TRI = MRI->getTargetRegisterInfo(); + + bool Changed = false; + + // Masked pseudos coming out of isel will have their mask operand in the form: + // + // $v0:vr = COPY %mask:vr + // %x:vr = Pseudo_MASK %a:vr, %b:br, $v0:vr + // + // Because $v0 isn't in SSA, keep track of it so we can check the mask operand + // on each pseudo. + MachineInstr *CurrentV0Def; + for (MachineBasicBlock &MBB : MF) { + CurrentV0Def = nullptr; + for (MachineInstr &MI : MBB) { + unsigned BaseOpc = RISCV::getRVVMCOpcode(MI.getOpcode()); + if (BaseOpc == RISCV::VMERGE_VVM) + Changed |= convertVMergeToVMv(MI, CurrentV0Def); + + if (MI.definesRegister(RISCV::V0, TRI)) + CurrentV0Def = &MI; + } + } + + return Changed; +} + +FunctionPass *llvm::createRISCVFoldMasksPass() { return new RISCVFoldMasks(); } diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 94d19948953258..c2cac993fe13c4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3685,40 +3685,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { return true; } -// Transform (VMERGE_VVM_ false, false, true, allones, vl, sew) to -// (VMV_V_V_ false, true, vl, sew). It may decrease uses of VMSET. -bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) { -#define CASE_VMERGE_TO_VMV(lmul) \ - case RISCV::PseudoVMERGE_VVM_##lmul: \ - NewOpc = RISCV::PseudoVMV_V_V_##lmul; \ - break; - unsigned NewOpc; - switch (N->getMachineOpcode()) { - default: - llvm_unreachable("Expected VMERGE_VVM_ instruction."); - CASE_VMERGE_TO_VMV(MF8) - CASE_VMERGE_TO_VMV(MF4) - CASE_VMERGE_TO_VMV(MF2) - CASE_VMERGE_TO_VMV(M1) - CASE_VMERGE_TO_VMV(M2) - CASE_VMERGE_TO_VMV(M4) - CASE_VMERGE_TO_VMV(M8) - } - - if (!usesAllOnesMask(N, /* MaskOpIdx */ 3)) - return false; - - SDLoc DL(N); - SDValue PolicyOp = - CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT()); - SDNode *Result = CurDAG->getMachineNode( - NewOpc, DL, N->getValueType(0), - {N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5), - PolicyOp}); - ReplaceUses(N, Result); - return true; -} - bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { bool MadeChange = false; SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); @@ -3730,8 +3696,6 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { if (IsVMerge(N) || IsVMv(N)) MadeChange |= performCombineVMergeAndVOps(N); - if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1)) - MadeChange |= performVMergeToVMv(N); } return MadeChange; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 953ac097b91504..85683a3adc968d 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -101,6 +101,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVOptWInstrsPass(*PR); initializeRISCVPreRAExpandPseudoPass(*PR); initializeRISCVExpandPseudoPass(*PR); + initializeRISCVFoldMasksPass(*PR); initializeRISCVInsertVSETVLIPass(*PR); initializeRISCVInsertReadWriteCSRPass(*PR); initializeRISCVDAGToDAGISelPass(*PR); @@ -414,7 +415,10 @@ void RISCVPassConfig::addPreEmitPass2() { } void RISCVPassConfig::addMachineSSAOptimization() { + addPass(createRISCVFoldMasksPass()); + TargetPassConfig::addMachineSSAOptimization(); + if (EnableMachineCombiner) addPass(&MachineCombinerID); diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index cf0826096bd41f..414b721661021f 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -82,6 +82,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions +; CHECK-NEXT: RISC-V Fold Masks ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Early Tail Duplication ; CHECK-NEXT: Optimize machine instruction PHIs diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir new file mode 100644 index 00000000000000..442419efb83caf --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir @@ -0,0 +1,71 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-fold-masks \ +# RUN: -verify-machineinstrs | FileCheck %s + +--- +name: undef_passthru +body: | + bb.0: + liveins: $x1, $v8, $v9 + ; CHECK-LABEL: name: undef_passthru + ; CHECK: liveins: $x1, $v8, $v9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %false:vr = COPY $v8 + ; CHECK-NEXT: %true:vr = COPY $v9 + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */ + ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5 /* e32 */, 0 /* tu, mu */ + %false:vr = COPY $v8 + %true:vr = COPY $v9 + %avl:gprnox0 = COPY $x1 + %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 + $v0 = COPY %mask + %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5 +... +--- +name: undef_false +body: | + bb.0: + liveins: $x1, $v8, $v9 + ; CHECK-LABEL: name: undef_false + ; CHECK: liveins: $x1, $v8, $v9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pt:vrnov0 = COPY $v8 + ; CHECK-NEXT: %false:vr = COPY $noreg + ; CHECK-NEXT: %true:vr = COPY $v9 + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */ + ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5 /* e32 */ + %pt:vrnov0 = COPY $v8 + %false:vr = COPY $noreg + %true:vr = COPY $v9 + %avl:gprnox0 = COPY $x1 + %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 + $v0 = COPY %mask + %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5 +... +--- +name: equal_passthru_false +body: | + bb.0: + liveins: $x1, $v8, $v9 + ; CHECK-LABEL: name: equal_passthru_false + ; CHECK: liveins: $x1, $v8, $v9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %false:vr = COPY $v8 + ; CHECK-NEXT: %pt:vrnov0 = COPY $v8 + ; CHECK-NEXT: %true:vr = COPY $v9 + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */ + ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5 /* e32 */, 0 /* tu, mu */ + %false:vr = COPY $v8 + %pt:vrnov0 = COPY $v8 + %true:vr = COPY $v9 + %avl:gprnox0 = COPY $x1 + %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 + $v0 = COPY %mask + %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5 +... From 849297c97d9e87584cae7c83fcca9686f784d54a Mon Sep 17 00:00:00 2001 From: Jessica Del <50999226+OutOfCache@users.noreply.github.com> Date: Mon, 30 Oct 2023 16:23:49 +0100 Subject: [PATCH 029/144] [AMDGPU][wmma] - Add tied wmma intrinsic (#69903) These new intrinsics, `amdgcn_wmma_tied_f16_16x16x16_f16` and `amdgcn_wmma_tied_f16_16x16x16_f16`, explicitly tie the destination accumulator matrix to the input accumulator matrix. The `wmma_f16` and `wmma_bf16` intrinsics only write to 16-bit of the 32-bit destination VGPRs. Which half is determined via the `op_sel` argument. The other half of the destination registers remains unchanged. In some cases however, we expect the destination to copy the other halves from the input accumulator. For instance, when packing two separate accumulator matrices into one. In that case, the two matrices are tied into the same registers, but separate halves. Then it is important to copy the other matrix values to the new destination. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 8 ++ .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 + llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 44 ++++---- .../AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll | 100 ++++++++++++++++++ .../AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll | 84 +++++++++++++++ .../CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll | 100 ++++++++++++++++++ .../CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll | 84 +++++++++++++++ 7 files changed, 404 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 5f1d1d932f74cb..89c7b6ab9ee433 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2361,8 +2361,16 @@ class AMDGPUWmmaIntrinsicIU : def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic; def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic; +// The regular, untied f16/bf16 wmma intrinsics only write to one half +// of the registers (set via the op_sel bit). +// The content of the other 16-bit of the registers is undefined. def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL; def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL; +// The tied versions of the f16/bf16 wmma intrinsics tie the destination matrix +// registers to the input accumulator registers. +// Essentially, the content of the other 16-bit is preserved from the input. +def int_amdgcn_wmma_f16_16x16x16_f16_tied : AMDGPUWmmaIntrinsicOPSEL; +def int_amdgcn_wmma_bf16_16x16x16_bf16_tied : AMDGPUWmmaIntrinsicOPSEL; def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU; def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 5b056bd9e5dba2..e409a24007a6b2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4279,6 +4279,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_sudot8: case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16: case Intrinsic::amdgcn_wmma_f16_16x16x16_f16: + case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied: + case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied: case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16: case Intrinsic::amdgcn_wmma_f32_16x16x16_f16: case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4: diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index b4149729d50e56..24d7550e2dec49 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -865,22 +865,26 @@ def WMMAOpcode3AddrMappingTable : WMMAMappingTable { // it converts the default pseudo to the pseudo where src2 is not the same as vdst. // 3) @earlyclobber on the destination satisfies the constraint during RA. -multiclass WMMAInst { +multiclass WMMAInst { defvar WMMAConstraints2Addr = "@earlyclobber $vdst,$vdst = $src2"; defvar WMMAConstraints3Addr = "@earlyclobber $vdst"; defvar WMMAProfile = VOPProfileWMMA; let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { - let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in { + let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = convertibleTo3Addr in { def _twoaddr # Suffix : VOP3P_Pseudo; } - let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in { - def _threeaddr # Suffix : VOP3P_Pseudo; - } } - def : WMMAOpcodeMapping(NAME # _twoaddr # Suffix), + if convertibleTo3Addr then { + let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { + let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in { + def _threeaddr # Suffix : VOP3P_Pseudo; + } + } + def : WMMAOpcodeMapping(NAME # _twoaddr # Suffix), !cast(NAME # _threeaddr # Suffix)>; + } if !eq(Type, WMMAOpSel) then { def : WMMAOpSelPat(NAME # _twoaddr # Suffix), node, P>; @@ -893,21 +897,25 @@ multiclass WMMAInst; - defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_bf16", VOP_V8F32_V16I16_V16I16_V8F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular>; - defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f16_16x16x16_f16", VOP_V16F16_V16F16_V16F16_V16F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel>; - defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_bf16_16x16x16_bf16", VOP_V16I16_V16I16_V16I16_V16I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel>; - defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu8", VOP_V8I32_V4I32_V4I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp>; - defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu4", VOP_V8I32_V2I32_V2I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp>; + defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_f16", VOP_V8F32_V16F16_V16F16_V8F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular, 1>; + defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_bf16", VOP_V8F32_V16I16_V16I16_V8F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular, 1>; + defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f16_16x16x16_f16", VOP_V16F16_V16F16_V16F16_V16F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel, 1>; + defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_bf16_16x16x16_bf16", VOP_V16I16_V16I16_V16I16_V16I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel, 1>; + defm V_WMMA_F16_16X16X16_F16_TIED : WMMAInst<"_w32", "v_wmma_f16_16x16x16_f16", VOP_V16F16_V16F16_V16F16_V16F16, int_amdgcn_wmma_f16_16x16x16_f16_tied, VRegSrc_256, WMMAOpSel, 0>; + defm V_WMMA_BF16_16X16X16_BF16_TIED : WMMAInst<"_w32", "v_wmma_bf16_16x16x16_bf16", VOP_V16I16_V16I16_V16I16_V16I16, int_amdgcn_wmma_bf16_16x16x16_bf16_tied, VRegSrc_256, WMMAOpSel, 0>; + defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu8", VOP_V8I32_V4I32_V4I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp, 1>; + defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu4", VOP_V8I32_V2I32_V2I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp, 1>; } let WaveSizePredicate = isWave64 in { - defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_f16", VOP_V4F32_V16F16_V16F16_V4F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular>; - defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_bf16", VOP_V4F32_V16I16_V16I16_V4F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular>; - defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f16_16x16x16_f16", VOP_V8F16_V16F16_V16F16_V8F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel>; - defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_bf16_16x16x16_bf16", VOP_V8I16_V16I16_V16I16_V8I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel>; - defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu8", VOP_V4I32_V4I32_V4I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp>; - defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu4", VOP_V4I32_V2I32_V2I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp>; + defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_f16", VOP_V4F32_V16F16_V16F16_V4F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular, 1>; + defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_bf16", VOP_V4F32_V16I16_V16I16_V4F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular, 1>; + defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f16_16x16x16_f16", VOP_V8F16_V16F16_V16F16_V8F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel, 1>; + defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_bf16_16x16x16_bf16", VOP_V8I16_V16I16_V16I16_V8I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel, 1>; + defm V_WMMA_F16_16X16X16_F16_TIED : WMMAInst<"_w64", "v_wmma_f16_16x16x16_f16", VOP_V8F16_V16F16_V16F16_V8F16, int_amdgcn_wmma_f16_16x16x16_f16_tied, VRegSrc_256, WMMAOpSel, 0>; + defm V_WMMA_BF16_16X16X16_BF16_TIED : WMMAInst<"_w64", "v_wmma_bf16_16x16x16_bf16", VOP_V8I16_V16I16_V16I16_V8I16, int_amdgcn_wmma_bf16_16x16x16_bf16_tied, VRegSrc_256, WMMAOpSel, 0>; + defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu8", VOP_V4I32_V4I32_V4I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp, 1>; + defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu4", VOP_V4I32_V2I32_V2I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp, 1>; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll index 6ca2dd838d37ac..2ddf367f0aafc2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll @@ -4,7 +4,9 @@ declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half>, <16 x half> , <8 x float>) declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16>, <16 x i16> , <8 x float>) declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half>, <16 x half> , <16 x half>, i1 immarg) +declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half>, <16 x half> , <16 x half>, i1 immarg) declare <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16>, <16 x i16> , <16 x i16>, i1 immarg) +declare <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16>, <16 x i16> , <16 x i16>, i1 immarg) declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 immarg, <4 x i32>, i1 immarg, <4 x i32> , <8 x i32>, i1 immarg) declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 immarg, <2 x i32> , <8 x i32>, i1 immarg) @@ -78,6 +80,55 @@ bb: ret void } +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_untied(<16 x half> %A.0, <16 x half> %B.0, <16 x half> %A.1, <16 x half> %B.1, <16 x half> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W32-LABEL: test_wmma_f16_16x16x16_f16_untied: +; W32: ; %bb.0: ; %bb +; W32-NEXT: v_wmma_f16_16x16x16_f16 v[44:51], v[0:7], v[8:15], v[32:39] +; W32-NEXT: v_wmma_f16_16x16x16_f16 v[32:39], v[16:23], v[24:31], v[32:39] +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[40:41], v[44:47], off +; W32-NEXT: global_store_b128 v[40:41], v[48:51], off offset:16 +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[42:43], v[32:35], off +; W32-NEXT: global_store_b128 v[42:43], v[36:39], off offset:16 +; W32-NEXT: s_nop 0 +; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W32-NEXT: s_endpgm +bb: + %res.0 = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A.0, <16 x half> %B.0, <16 x half> %C, i1 0) + %res.1 = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A.1, <16 x half> %B.1, <16 x half> %C, i1 0) + store <16 x half> %res.0, ptr addrspace(1) %out.0, align 32 + store <16 x half> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_tied(<16 x half> %A.0, <16 x half> %B.0, <16 x half> %A.1, <16 x half> %B.1, <16 x half> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W32-LABEL: test_wmma_f16_16x16x16_f16_tied: +; W32: ; %bb.0: ; %bb +; W32-NEXT: v_dual_mov_b32 v51, v39 :: v_dual_mov_b32 v50, v38 +; W32-NEXT: v_dual_mov_b32 v49, v37 :: v_dual_mov_b32 v48, v36 +; W32-NEXT: v_dual_mov_b32 v47, v35 :: v_dual_mov_b32 v46, v34 +; W32-NEXT: v_dual_mov_b32 v45, v33 :: v_dual_mov_b32 v44, v32 +; W32-NEXT: v_wmma_f16_16x16x16_f16 v[32:39], v[16:23], v[24:31], v[32:39] +; W32-NEXT: s_delay_alu instid0(VALU_DEP_2) +; W32-NEXT: v_wmma_f16_16x16x16_f16 v[44:51], v[0:7], v[8:15], v[44:51] +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[40:41], v[44:47], off +; W32-NEXT: global_store_b128 v[40:41], v[48:51], off offset:16 +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[42:43], v[32:35], off +; W32-NEXT: global_store_b128 v[42:43], v[36:39], off offset:16 +; W32-NEXT: s_nop 0 +; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W32-NEXT: s_endpgm +bb: + %res.0 = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half> %A.0, <16 x half> %B.0, <16 x half> %C, i1 0) + %res.1 = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half> %A.1, <16 x half> %B.1, <16 x half> %C, i1 0) + store <16 x half> %res.0, ptr addrspace(1) %out.0, align 32 + store <16 x half> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + ; @llvm.amdgcn.wmma.bf16.16x16x16.bf16 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, ptr addrspace(1) %out) { @@ -112,6 +163,55 @@ bb: ret void } +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_untied(<16 x i16> %A.0, <16 x i16> %B.0, <16 x i16> %A.1, <16 x i16> %B.1, <16 x i16> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W32-LABEL: test_wmma_bf16_16x16x16_bf16_untied: +; W32: ; %bb.0: ; %bb +; W32-NEXT: v_wmma_bf16_16x16x16_bf16 v[44:51], v[0:7], v[8:15], v[32:39] +; W32-NEXT: v_wmma_bf16_16x16x16_bf16 v[32:39], v[16:23], v[24:31], v[32:39] +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[40:41], v[44:47], off +; W32-NEXT: global_store_b128 v[40:41], v[48:51], off offset:16 +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[42:43], v[32:35], off +; W32-NEXT: global_store_b128 v[42:43], v[36:39], off offset:16 +; W32-NEXT: s_nop 0 +; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W32-NEXT: s_endpgm +bb: + %res.0 = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A.0, <16 x i16> %B.0, <16 x i16> %C, i1 0) + %res.1 = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A.1, <16 x i16> %B.1, <16 x i16> %C, i1 0) + store <16 x i16> %res.0, ptr addrspace(1) %out.0, align 32 + store <16 x i16> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_tied(<16 x i16> %A.0, <16 x i16> %B.0, <16 x i16> %A.1, <16 x i16> %B.1, <16 x i16> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W32-LABEL: test_wmma_bf16_16x16x16_bf16_tied: +; W32: ; %bb.0: ; %bb +; W32-NEXT: v_dual_mov_b32 v51, v39 :: v_dual_mov_b32 v50, v38 +; W32-NEXT: v_dual_mov_b32 v49, v37 :: v_dual_mov_b32 v48, v36 +; W32-NEXT: v_dual_mov_b32 v47, v35 :: v_dual_mov_b32 v46, v34 +; W32-NEXT: v_dual_mov_b32 v45, v33 :: v_dual_mov_b32 v44, v32 +; W32-NEXT: v_wmma_bf16_16x16x16_bf16 v[32:39], v[16:23], v[24:31], v[32:39] +; W32-NEXT: s_delay_alu instid0(VALU_DEP_2) +; W32-NEXT: v_wmma_bf16_16x16x16_bf16 v[44:51], v[0:7], v[8:15], v[44:51] +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[40:41], v[44:47], off +; W32-NEXT: global_store_b128 v[40:41], v[48:51], off offset:16 +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[42:43], v[32:35], off +; W32-NEXT: global_store_b128 v[42:43], v[36:39], off offset:16 +; W32-NEXT: s_nop 0 +; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W32-NEXT: s_endpgm +bb: + %res.0 = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16> %A.0, <16 x i16> %B.0, <16 x i16> %C, i1 0) + %res.1 = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16> %A.1, <16 x i16> %B.1, <16 x i16> %C, i1 0) + store <16 x i16> %res.0, ptr addrspace(1) %out.0, align 32 + store <16 x i16> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + ; @llvm.amdgcn.wmma.i32.16x16x16.iu8 define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll index a18d0a569bfb6e..09702ae5cb608f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll @@ -4,7 +4,9 @@ declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half>, <16 x half>, <4 x float>) declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16>, <16 x i16>, <4 x float>) declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half>, <16 x half>, <8 x half>, i1 immarg) +declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half>, <16 x half>, <8 x half>, i1 immarg) declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16>, <16 x i16>, <8 x i16>, i1 immarg) +declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16>, <16 x i16>, <8 x i16>, i1 immarg) declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 immarg, <4 x i32>, i1 immarg, <4 x i32>, <4 x i32>, i1 immarg) declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <4 x i32>, i1 immarg) @@ -70,6 +72,47 @@ bb: ret void } +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_untied(<16 x half> %A.0, <16 x half> %B.0, <16 x half> %A.1, <16 x half> %B.1, <8 x half> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W64-LABEL: test_wmma_f16_16x16x16_f16_untied: +; W64: ; %bb.0: ; %bb +; W64-NEXT: v_wmma_f16_16x16x16_f16 v[40:43], v[0:7], v[8:15], v[32:35] +; W64-NEXT: v_wmma_f16_16x16x16_f16 v[32:35], v[16:23], v[24:31], v[32:35] +; W64-NEXT: global_store_b128 v[36:37], v[40:43], off +; W64-NEXT: global_store_b128 v[38:39], v[32:35], off +; W64-NEXT: s_nop 0 +; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W64-NEXT: s_endpgm +bb: + %res.0 = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A.0, <16 x half> %B.0, <8 x half> %C, i1 0) + %res.1 = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A.1, <16 x half> %B.1, <8 x half> %C, i1 0) + store <8 x half> %res.0, ptr addrspace(1) %out.0, align 32 + store <8 x half> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_tied(<16 x half> %A.0, <16 x half> %B.0, <16 x half> %A.1, <16 x half> %B.1, <8 x half> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W64-LABEL: test_wmma_f16_16x16x16_f16_tied: +; W64: ; %bb.0: ; %bb +; W64-NEXT: v_mov_b32_e32 v43, v35 +; W64-NEXT: v_mov_b32_e32 v42, v34 +; W64-NEXT: v_mov_b32_e32 v41, v33 +; W64-NEXT: v_mov_b32_e32 v40, v32 +; W64-NEXT: v_wmma_f16_16x16x16_f16 v[32:35], v[16:23], v[24:31], v[32:35] +; W64-NEXT: s_delay_alu instid0(VALU_DEP_2) +; W64-NEXT: v_wmma_f16_16x16x16_f16 v[40:43], v[0:7], v[8:15], v[40:43] +; W64-NEXT: global_store_b128 v[36:37], v[40:43], off +; W64-NEXT: global_store_b128 v[38:39], v[32:35], off +; W64-NEXT: s_nop 0 +; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W64-NEXT: s_endpgm +bb: + %res.0 = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half> %A.0, <16 x half> %B.0, <8 x half> %C, i1 0) + %res.1 = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half> %A.1, <16 x half> %B.1, <8 x half> %C, i1 0) + store <8 x half> %res.0, ptr addrspace(1) %out.0, align 32 + store <8 x half> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + ; @llvm.amdgcn.wmma.bf16.16x16x16.bf16 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, ptr addrspace(1) %out) { @@ -100,6 +143,47 @@ bb: ret void } +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_untied(<16 x i16> %A.0, <16 x i16> %B.0, <16 x i16> %A.1, <16 x i16> %B.1, <8 x i16> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W64-LABEL: test_wmma_bf16_16x16x16_bf16_untied: +; W64: ; %bb.0: ; %bb +; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[40:43], v[0:7], v[8:15], v[32:35] +; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[32:35], v[16:23], v[24:31], v[32:35] +; W64-NEXT: global_store_b128 v[36:37], v[40:43], off +; W64-NEXT: global_store_b128 v[38:39], v[32:35], off +; W64-NEXT: s_nop 0 +; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W64-NEXT: s_endpgm +bb: + %res.0 = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A.0, <16 x i16> %B.0, <8 x i16> %C, i1 0) + %res.1 = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A.1, <16 x i16> %B.1, <8 x i16> %C, i1 0) + store <8 x i16> %res.0, ptr addrspace(1) %out.0, align 32 + store <8 x i16> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_tied(<16 x i16> %A.0, <16 x i16> %B.0, <16 x i16> %A.1, <16 x i16> %B.1, <8 x i16> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W64-LABEL: test_wmma_bf16_16x16x16_bf16_tied: +; W64: ; %bb.0: ; %bb +; W64-NEXT: v_mov_b32_e32 v43, v35 +; W64-NEXT: v_mov_b32_e32 v42, v34 +; W64-NEXT: v_mov_b32_e32 v41, v33 +; W64-NEXT: v_mov_b32_e32 v40, v32 +; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[32:35], v[16:23], v[24:31], v[32:35] +; W64-NEXT: s_delay_alu instid0(VALU_DEP_2) +; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[40:43], v[0:7], v[8:15], v[40:43] +; W64-NEXT: global_store_b128 v[36:37], v[40:43], off +; W64-NEXT: global_store_b128 v[38:39], v[32:35], off +; W64-NEXT: s_nop 0 +; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W64-NEXT: s_endpgm +bb: + %res.0 = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16> %A.0, <16 x i16> %B.0, <8 x i16> %C, i1 0) + %res.1 = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16> %A.1, <16 x i16> %B.1, <8 x i16> %C, i1 0) + store <8 x i16> %res.0, ptr addrspace(1) %out.0, align 32 + store <8 x i16> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + ; @llvm.amdgcn.wmma.i32.16x16x16.iu8 define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll index 464c374f638c27..5076fda60a4749 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll @@ -4,7 +4,9 @@ declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half>, <16 x half> , <8 x float>) declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16>, <16 x i16> , <8 x float>) declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half>, <16 x half> , <16 x half>, i1 immarg) +declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half>, <16 x half> , <16 x half>, i1 immarg) declare <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16>, <16 x i16> , <16 x i16>, i1 immarg) +declare <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16>, <16 x i16> , <16 x i16>, i1 immarg) declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 immarg, <4 x i32>, i1 immarg, <4 x i32> , <8 x i32>, i1 immarg) declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 immarg, <2 x i32> , <8 x i32>, i1 immarg) @@ -78,6 +80,55 @@ bb: ret void } +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_untied(<16 x half> %A.0, <16 x half> %B.0, <16 x half> %A.1, <16 x half> %B.1, <16 x half> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W32-LABEL: test_wmma_f16_16x16x16_f16_untied: +; W32: ; %bb.0: ; %bb +; W32-NEXT: v_wmma_f16_16x16x16_f16 v[44:51], v[0:7], v[8:15], v[32:39] +; W32-NEXT: v_wmma_f16_16x16x16_f16 v[32:39], v[16:23], v[24:31], v[32:39] +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[40:41], v[48:51], off offset:16 +; W32-NEXT: global_store_b128 v[40:41], v[44:47], off +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[42:43], v[36:39], off offset:16 +; W32-NEXT: global_store_b128 v[42:43], v[32:35], off +; W32-NEXT: s_nop 0 +; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W32-NEXT: s_endpgm +bb: + %res.0 = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A.0, <16 x half> %B.0, <16 x half> %C, i1 0) + %res.1 = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A.1, <16 x half> %B.1, <16 x half> %C, i1 0) + store <16 x half> %res.0, ptr addrspace(1) %out.0, align 32 + store <16 x half> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_tied(<16 x half> %A.0, <16 x half> %B.0, <16 x half> %A.1, <16 x half> %B.1, <16 x half> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W32-LABEL: test_wmma_f16_16x16x16_f16_tied: +; W32: ; %bb.0: ; %bb +; W32-NEXT: v_dual_mov_b32 v51, v39 :: v_dual_mov_b32 v50, v38 +; W32-NEXT: v_dual_mov_b32 v49, v37 :: v_dual_mov_b32 v48, v36 +; W32-NEXT: v_dual_mov_b32 v47, v35 :: v_dual_mov_b32 v46, v34 +; W32-NEXT: v_dual_mov_b32 v45, v33 :: v_dual_mov_b32 v44, v32 +; W32-NEXT: v_wmma_f16_16x16x16_f16 v[32:39], v[16:23], v[24:31], v[32:39] +; W32-NEXT: s_delay_alu instid0(VALU_DEP_2) +; W32-NEXT: v_wmma_f16_16x16x16_f16 v[44:51], v[0:7], v[8:15], v[44:51] +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[40:41], v[48:51], off offset:16 +; W32-NEXT: global_store_b128 v[40:41], v[44:47], off +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[42:43], v[36:39], off offset:16 +; W32-NEXT: global_store_b128 v[42:43], v[32:35], off +; W32-NEXT: s_nop 0 +; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W32-NEXT: s_endpgm +bb: + %res.0 = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half> %A.0, <16 x half> %B.0, <16 x half> %C, i1 0) + %res.1 = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half> %A.1, <16 x half> %B.1, <16 x half> %C, i1 0) + store <16 x half> %res.0, ptr addrspace(1) %out.0, align 32 + store <16 x half> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + ; @llvm.amdgcn.wmma.bf16.16x16x16.bf16 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, ptr addrspace(1) %out) { @@ -112,6 +163,55 @@ bb: ret void } +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_untied(<16 x i16> %A.0, <16 x i16> %B.0, <16 x i16> %A.1, <16 x i16> %B.1, <16 x i16> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W32-LABEL: test_wmma_bf16_16x16x16_bf16_untied: +; W32: ; %bb.0: ; %bb +; W32-NEXT: v_wmma_bf16_16x16x16_bf16 v[44:51], v[0:7], v[8:15], v[32:39] +; W32-NEXT: v_wmma_bf16_16x16x16_bf16 v[32:39], v[16:23], v[24:31], v[32:39] +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[40:41], v[48:51], off offset:16 +; W32-NEXT: global_store_b128 v[40:41], v[44:47], off +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[42:43], v[36:39], off offset:16 +; W32-NEXT: global_store_b128 v[42:43], v[32:35], off +; W32-NEXT: s_nop 0 +; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W32-NEXT: s_endpgm +bb: + %res.0 = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A.0, <16 x i16> %B.0, <16 x i16> %C, i1 0) + %res.1 = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A.1, <16 x i16> %B.1, <16 x i16> %C, i1 0) + store <16 x i16> %res.0, ptr addrspace(1) %out.0, align 32 + store <16 x i16> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_tied(<16 x i16> %A.0, <16 x i16> %B.0, <16 x i16> %A.1, <16 x i16> %B.1, <16 x i16> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W32-LABEL: test_wmma_bf16_16x16x16_bf16_tied: +; W32: ; %bb.0: ; %bb +; W32-NEXT: v_dual_mov_b32 v51, v39 :: v_dual_mov_b32 v50, v38 +; W32-NEXT: v_dual_mov_b32 v49, v37 :: v_dual_mov_b32 v48, v36 +; W32-NEXT: v_dual_mov_b32 v47, v35 :: v_dual_mov_b32 v46, v34 +; W32-NEXT: v_dual_mov_b32 v45, v33 :: v_dual_mov_b32 v44, v32 +; W32-NEXT: v_wmma_bf16_16x16x16_bf16 v[32:39], v[16:23], v[24:31], v[32:39] +; W32-NEXT: s_delay_alu instid0(VALU_DEP_2) +; W32-NEXT: v_wmma_bf16_16x16x16_bf16 v[44:51], v[0:7], v[8:15], v[44:51] +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[40:41], v[48:51], off offset:16 +; W32-NEXT: global_store_b128 v[40:41], v[44:47], off +; W32-NEXT: s_clause 0x1 +; W32-NEXT: global_store_b128 v[42:43], v[36:39], off offset:16 +; W32-NEXT: global_store_b128 v[42:43], v[32:35], off +; W32-NEXT: s_nop 0 +; W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W32-NEXT: s_endpgm +bb: + %res.0 = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16> %A.0, <16 x i16> %B.0, <16 x i16> %C, i1 0) + %res.1 = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16> %A.1, <16 x i16> %B.1, <16 x i16> %C, i1 0) + store <16 x i16> %res.0, ptr addrspace(1) %out.0, align 32 + store <16 x i16> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + ; @llvm.amdgcn.wmma.i32.16x16x16.iu8 define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll index 7b1e29c18c723f..e47a91b4fc0bac 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll @@ -4,7 +4,9 @@ declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half>, <16 x half>, <4 x float>) declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16>, <16 x i16>, <4 x float>) declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half>, <16 x half>, <8 x half>, i1 immarg) +declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half>, <16 x half>, <8 x half>, i1 immarg) declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16>, <16 x i16>, <8 x i16>, i1 immarg) +declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16>, <16 x i16>, <8 x i16>, i1 immarg) declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 immarg, <4 x i32>, i1 immarg, <4 x i32>, <4 x i32>, i1 immarg) declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <4 x i32>, i1 immarg) @@ -70,6 +72,47 @@ bb: ret void } +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_untied(<16 x half> %A.0, <16 x half> %B.0, <16 x half> %A.1, <16 x half> %B.1, <8 x half> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W64-LABEL: test_wmma_f16_16x16x16_f16_untied: +; W64: ; %bb.0: ; %bb +; W64-NEXT: v_wmma_f16_16x16x16_f16 v[40:43], v[0:7], v[8:15], v[32:35] +; W64-NEXT: v_wmma_f16_16x16x16_f16 v[32:35], v[16:23], v[24:31], v[32:35] +; W64-NEXT: global_store_b128 v[36:37], v[40:43], off +; W64-NEXT: global_store_b128 v[38:39], v[32:35], off +; W64-NEXT: s_nop 0 +; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W64-NEXT: s_endpgm +bb: + %res.0 = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A.0, <16 x half> %B.0, <8 x half> %C, i1 0) + %res.1 = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A.1, <16 x half> %B.1, <8 x half> %C, i1 0) + store <8 x half> %res.0, ptr addrspace(1) %out.0, align 32 + store <8 x half> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_tied(<16 x half> %A.0, <16 x half> %B.0, <16 x half> %A.1, <16 x half> %B.1, <8 x half> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W64-LABEL: test_wmma_f16_16x16x16_f16_tied: +; W64: ; %bb.0: ; %bb +; W64-NEXT: v_mov_b32_e32 v43, v35 +; W64-NEXT: v_mov_b32_e32 v42, v34 +; W64-NEXT: v_mov_b32_e32 v41, v33 +; W64-NEXT: v_mov_b32_e32 v40, v32 +; W64-NEXT: v_wmma_f16_16x16x16_f16 v[32:35], v[16:23], v[24:31], v[32:35] +; W64-NEXT: s_delay_alu instid0(VALU_DEP_2) +; W64-NEXT: v_wmma_f16_16x16x16_f16 v[40:43], v[0:7], v[8:15], v[40:43] +; W64-NEXT: global_store_b128 v[36:37], v[40:43], off +; W64-NEXT: global_store_b128 v[38:39], v[32:35], off +; W64-NEXT: s_nop 0 +; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W64-NEXT: s_endpgm +bb: + %res.0 = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half> %A.0, <16 x half> %B.0, <8 x half> %C, i1 0) + %res.1 = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half> %A.1, <16 x half> %B.1, <8 x half> %C, i1 0) + store <8 x half> %res.0, ptr addrspace(1) %out.0, align 32 + store <8 x half> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + ; @llvm.amdgcn.wmma.bf16.16x16x16.bf16 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, ptr addrspace(1) %out) { @@ -100,6 +143,47 @@ bb: ret void } +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_untied(<16 x i16> %A.0, <16 x i16> %B.0, <16 x i16> %A.1, <16 x i16> %B.1, <8 x i16> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W64-LABEL: test_wmma_bf16_16x16x16_bf16_untied: +; W64: ; %bb.0: ; %bb +; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[40:43], v[0:7], v[8:15], v[32:35] +; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[32:35], v[16:23], v[24:31], v[32:35] +; W64-NEXT: global_store_b128 v[36:37], v[40:43], off +; W64-NEXT: global_store_b128 v[38:39], v[32:35], off +; W64-NEXT: s_nop 0 +; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W64-NEXT: s_endpgm +bb: + %res.0 = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A.0, <16 x i16> %B.0, <8 x i16> %C, i1 0) + %res.1 = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A.1, <16 x i16> %B.1, <8 x i16> %C, i1 0) + store <8 x i16> %res.0, ptr addrspace(1) %out.0, align 32 + store <8 x i16> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_tied(<16 x i16> %A.0, <16 x i16> %B.0, <16 x i16> %A.1, <16 x i16> %B.1, <8 x i16> %C, ptr addrspace(1) %out.0, ptr addrspace(1) %out.1) { +; W64-LABEL: test_wmma_bf16_16x16x16_bf16_tied: +; W64: ; %bb.0: ; %bb +; W64-NEXT: v_mov_b32_e32 v43, v35 +; W64-NEXT: v_mov_b32_e32 v42, v34 +; W64-NEXT: v_mov_b32_e32 v41, v33 +; W64-NEXT: v_mov_b32_e32 v40, v32 +; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[32:35], v[16:23], v[24:31], v[32:35] +; W64-NEXT: s_delay_alu instid0(VALU_DEP_2) +; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[40:43], v[0:7], v[8:15], v[40:43] +; W64-NEXT: global_store_b128 v[36:37], v[40:43], off +; W64-NEXT: global_store_b128 v[38:39], v[32:35], off +; W64-NEXT: s_nop 0 +; W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; W64-NEXT: s_endpgm +bb: + %res.0 = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16> %A.0, <16 x i16> %B.0, <8 x i16> %C, i1 0) + %res.1 = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16> %A.1, <16 x i16> %B.1, <8 x i16> %C, i1 0) + store <8 x i16> %res.0, ptr addrspace(1) %out.0, align 32 + store <8 x i16> %res.1, ptr addrspace(1) %out.1, align 32 + ret void +} + ; @llvm.amdgcn.wmma.i32.16x16x16.iu8 define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { From 56dab2cb0733f10df4e9cff8c83dd7081154527b Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Mon, 30 Oct 2023 16:27:47 +0100 Subject: [PATCH 030/144] [clang][Interp] Fix truncateCast() (#69911) The added test case used to fail because we converted the LHS to `-1`. --- clang/lib/AST/Interp/IntegralAP.h | 27 +++++++++++++++------------ clang/test/AST/Interp/intap.cpp | 3 +++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/Interp/IntegralAP.h b/clang/lib/AST/Interp/IntegralAP.h index cfed9ca29336d2..9aefea6d0c47ed 100644 --- a/clang/lib/AST/Interp/IntegralAP.h +++ b/clang/lib/AST/Interp/IntegralAP.h @@ -35,10 +35,18 @@ template class IntegralAP final { friend IntegralAP; APInt V; - template static T truncateCast(const APInt &V) { + template + static T truncateCast(const APInt &V) { constexpr unsigned BitSize = sizeof(T) * 8; - if (BitSize >= V.getBitWidth()) - return std::is_signed_v ? V.getSExtValue() : V.getZExtValue(); + if (BitSize >= V.getBitWidth()) { + APInt Extended; + if constexpr (InputSigned) + Extended = V.sext(BitSize); + else + Extended = V.zext(BitSize); + return std::is_signed_v ? Extended.getSExtValue() + : Extended.getZExtValue(); + } return std::is_signed_v ? V.trunc(BitSize).getSExtValue() : V.trunc(BitSize).getZExtValue(); @@ -80,15 +88,10 @@ template class IntegralAP final { return V.ult(RHS.V); } - explicit operator bool() const { return !V.isZero(); } - explicit operator int8_t() const { return truncateCast(V); } - explicit operator uint8_t() const { return truncateCast(V); } - explicit operator int16_t() const { return truncateCast(V); } - explicit operator uint16_t() const { return truncateCast(V); } - explicit operator int32_t() const { return truncateCast(V); } - explicit operator uint32_t() const { return truncateCast(V); } - explicit operator int64_t() const { return truncateCast(V); } - explicit operator uint64_t() const { return truncateCast(V); } + template >> + explicit operator Ty() const { + return truncateCast(V); + } template static IntegralAP from(T Value, unsigned NumBits = 0) { assert(NumBits > 0); diff --git a/clang/test/AST/Interp/intap.cpp b/clang/test/AST/Interp/intap.cpp index 27fae1b904351c..02a860eb0986c1 100644 --- a/clang/test/AST/Interp/intap.cpp +++ b/clang/test/AST/Interp/intap.cpp @@ -27,6 +27,9 @@ static_assert(BitIntZero2 == 0, ""); constexpr unsigned _BitInt(1) UBitIntZero1{}; static_assert(UBitIntZero1 == 0, ""); +constexpr unsigned _BitInt(2) BI1 = 3u; +static_assert(BI1 == 3, ""); + #ifdef __SIZEOF_INT128__ namespace i128 { From f89b85996a1bd44ca5ef1f9d0df64f15f248148d Mon Sep 17 00:00:00 2001 From: Natalie Chouinard <1953083+sudonatalie@users.noreply.github.com> Date: Mon, 30 Oct 2023 11:36:38 -0400 Subject: [PATCH 031/144] [HLSL][SPIR-V] Fix clang driver lang target test (#70330) This test has been failing since the SPIR-V backend started failing explicitly on unsupported shader types. Switched this test to a compute shader since it is currently the only type supported. --- clang/test/Driver/hlsl-lang-targets-spirv.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/test/Driver/hlsl-lang-targets-spirv.hlsl b/clang/test/Driver/hlsl-lang-targets-spirv.hlsl index ff29f143ba1dc8..e04d71263770bb 100644 --- a/clang/test/Driver/hlsl-lang-targets-spirv.hlsl +++ b/clang/test/Driver/hlsl-lang-targets-spirv.hlsl @@ -2,8 +2,8 @@ // Supported targets // -// RUN: %clang -target dxil-unknown-shadermodel6.2-pixel %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-VALID %s -// RUN: %clang -target spirv-unknown-shadermodel6.2-library %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-VALID %s +// RUN: %clang -target dxil-unknown-shadermodel6.2-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s +// RUN: %clang -target spirv-unknown-shadermodel6.2-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s // Empty shader model // @@ -27,5 +27,5 @@ // CHECK-NO-ENV: error: shader stage is required in target '{{.*}}' for HLSL code generation // CHECK-BAD-ENV: error: shader stage '{{.*}}' in target '{{.*}}' is invalid for HLSL code generation -[shader("pixel")] +[shader("compute"), numthreads(1,1,1)] void main() {} From 8bc4462bc1fb14d7fb0cd80560e88a2acdd46093 Mon Sep 17 00:00:00 2001 From: tsitdikov <149382295+tsitdikov@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:37:30 +0000 Subject: [PATCH 032/144] Remove unused variable. (#70670) All usages of the variable have been removed in https://github.com/llvm/llvm-project/pull/68689, we now need to clean it up. --- .../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 67875f668d4d3e..6a515c2ba4e87e 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1852,7 +1852,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, SmallVector mapOperands; SmallVector useDevPtrOperands; SmallVector useDevAddrOperands; - ArrayAttr mapTypes; llvm::omp::RuntimeFunction RTLFn; DataLayout DL = DataLayout(op->getParentOfType()); From bb9dced2d3b479fc47221a25eae496f15c573c3c Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 30 Oct 2023 15:45:40 +0000 Subject: [PATCH 033/144] [lldb][AArch64][Linux] Rename IsEnabled to IsPresent (#70303) For most register sets, if it was enabled this meant you could use it, it was present in the process. There was no present but turned off state. So "enabled" made sense. Then ZA came along (and soon to be ZT0) where ZA can be present in the hardware when you have SME, but ZA itself can be made inactive. This means that "IsZAEnabled()" doesn't mean is it active, it means do you have SME. Which is very confusing when we actually want to know if ZA is active. So instead say "IsZAPresent", to make these checks more specific. For things that can't be made inactive, present will imply "active" as they're never inactive. --- .../NativeRegisterContextLinux_arm64.cpp | 21 +++++++++---------- .../Process/Utility/RegisterInfoPOSIX_arm64.h | 12 +++++------ .../RegisterContextPOSIXCore_arm64.cpp | 12 +++++------ 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp index b5210c36814420..22aa2f3a920945 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp @@ -166,10 +166,10 @@ NativeRegisterContextLinux_arm64::NativeRegisterContextLinux_arm64( m_tls_is_valid = false; // SME adds the tpidr2 register - m_tls_size = GetRegisterInfo().IsSSVEEnabled() ? sizeof(m_tls_regs) + m_tls_size = GetRegisterInfo().IsSSVEPresent() ? sizeof(m_tls_regs) : sizeof(m_tls_regs.tpidr_reg); - if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) + if (GetRegisterInfo().IsSVEPresent() || GetRegisterInfo().IsSSVEPresent()) m_sve_state = SVEState::Unknown; else m_sve_state = SVEState::Disabled; @@ -609,8 +609,7 @@ NativeRegisterContextLinux_arm64::CacheAllRegisters(uint32_t &cached_size) { if (error.Fail()) return error; - // Here this means, does the system have ZA, not whether it is active. - if (GetRegisterInfo().IsZAEnabled()) { + if (GetRegisterInfo().IsZAPresent()) { error = ReadZAHeader(); if (error.Fail()) return error; @@ -628,7 +627,7 @@ NativeRegisterContextLinux_arm64::CacheAllRegisters(uint32_t &cached_size) { } // If SVE is enabled we need not copy FPR separately. - if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) { + if (GetRegisterInfo().IsSVEPresent() || GetRegisterInfo().IsSSVEPresent()) { // Store mode and register data. cached_size += sizeof(RegisterSetType) + sizeof(m_sve_state) + GetSVEBufferSize(); @@ -640,7 +639,7 @@ NativeRegisterContextLinux_arm64::CacheAllRegisters(uint32_t &cached_size) { if (error.Fail()) return error; - if (GetRegisterInfo().IsMTEEnabled()) { + if (GetRegisterInfo().IsMTEPresent()) { cached_size += sizeof(RegisterSetType) + GetMTEControlSize(); error = ReadMTEControl(); if (error.Fail()) @@ -708,7 +707,7 @@ Status NativeRegisterContextLinux_arm64::ReadAllRegisterValues( // constants and the functions vec_set_vector_length, sve_set_common and // za_set in the Linux Kernel. - if ((m_sve_state != SVEState::Streaming) && GetRegisterInfo().IsZAEnabled()) { + if ((m_sve_state != SVEState::Streaming) && GetRegisterInfo().IsZAPresent()) { // Use the header size not the buffer size, as we may be using the buffer // for fake data, which we do not want to write out. assert(m_za_header.size <= GetZABufferSize()); @@ -716,7 +715,7 @@ Status NativeRegisterContextLinux_arm64::ReadAllRegisterValues( m_za_header.size); } - if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) { + if (GetRegisterInfo().IsSVEPresent() || GetRegisterInfo().IsSSVEPresent()) { dst = AddRegisterSetType(dst, RegisterSetType::SVE); *(reinterpret_cast(dst)) = m_sve_state; dst += sizeof(m_sve_state); @@ -726,13 +725,13 @@ Status NativeRegisterContextLinux_arm64::ReadAllRegisterValues( GetFPRSize()); } - if ((m_sve_state == SVEState::Streaming) && GetRegisterInfo().IsZAEnabled()) { + if ((m_sve_state == SVEState::Streaming) && GetRegisterInfo().IsZAPresent()) { assert(m_za_header.size <= GetZABufferSize()); dst = AddSavedRegisters(dst, RegisterSetType::SME, GetZABuffer(), m_za_header.size); } - if (GetRegisterInfo().IsMTEEnabled()) { + if (GetRegisterInfo().IsMTEPresent()) { dst = AddSavedRegisters(dst, RegisterSetType::MTE, GetMTEControl(), GetMTEControlSize()); } @@ -1411,7 +1410,7 @@ std::vector NativeRegisterContextLinux_arm64::GetExpeditedRegisters( expedited_reg_nums.push_back(GetRegisterInfo().GetRegNumSVEVG()); // SME, streaming vector length. This is used by the ZA register which is // present even when streaming mode is not enabled. - if (GetRegisterInfo().IsSSVEEnabled()) + if (GetRegisterInfo().IsSSVEPresent()) expedited_reg_nums.push_back(GetRegisterInfo().GetRegNumSMESVG()); return expedited_reg_nums; diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h index debdf4c76abc25..1bb3400e426a91 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h @@ -120,12 +120,12 @@ class RegisterInfoPOSIX_arm64 return false; } - bool IsSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); } - bool IsSSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSSVE); } - bool IsZAEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskZA); } - bool IsPAuthEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); } - bool IsMTEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); } - bool IsTLSEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskTLS); } + bool IsSVEPresent() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); } + bool IsSSVEPresent() const { return m_opt_regsets.AnySet(eRegsetMaskSSVE); } + bool IsZAPresent() const { return m_opt_regsets.AnySet(eRegsetMaskZA); } + bool IsPAuthPresent() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); } + bool IsMTEPresent() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); } + bool IsTLSPresent() const { return m_opt_regsets.AnySet(eRegsetMaskTLS); } bool IsSVEReg(unsigned reg) const; bool IsSVEZReg(unsigned reg) const; diff --git a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp index 99cee83eed1251..db37b7cbb99d7e 100644 --- a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp +++ b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp @@ -75,7 +75,7 @@ RegisterContextCorePOSIX_arm64::RegisterContextCorePOSIX_arm64( m_register_info_up->GetTargetArchitecture().GetTriple(); m_fpr_data = getRegset(notes, target_triple, FPR_Desc); - if (m_register_info_up->IsSSVEEnabled()) { + if (m_register_info_up->IsSSVEPresent()) { m_sve_data = getRegset(notes, target_triple, AARCH64_SSVE_Desc); lldb::offset_t flags_offset = 12; uint16_t flags = m_sve_data.GetU32(&flags_offset); @@ -83,19 +83,19 @@ RegisterContextCorePOSIX_arm64::RegisterContextCorePOSIX_arm64( m_sve_state = SVEState::Streaming; } - if (m_sve_state != SVEState::Streaming && m_register_info_up->IsSVEEnabled()) + if (m_sve_state != SVEState::Streaming && m_register_info_up->IsSVEPresent()) m_sve_data = getRegset(notes, target_triple, AARCH64_SVE_Desc); - if (m_register_info_up->IsPAuthEnabled()) + if (m_register_info_up->IsPAuthPresent()) m_pac_data = getRegset(notes, target_triple, AARCH64_PAC_Desc); - if (m_register_info_up->IsTLSEnabled()) + if (m_register_info_up->IsTLSPresent()) m_tls_data = getRegset(notes, target_triple, AARCH64_TLS_Desc); - if (m_register_info_up->IsZAEnabled()) + if (m_register_info_up->IsZAPresent()) m_za_data = getRegset(notes, target_triple, AARCH64_ZA_Desc); - if (m_register_info_up->IsMTEEnabled()) + if (m_register_info_up->IsMTEPresent()) m_mte_data = getRegset(notes, target_triple, AARCH64_MTE_Desc); ConfigureRegisterContext(); From fecd11ba87557997b2765cd88e5c058df4eb50ce Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 30 Oct 2023 15:38:49 +0000 Subject: [PATCH 034/144] [RISCV] Remove old peephole declaration in RISCVISelDAGToDAG.h. NFC It was removed in 72e6c1c70d5e07bbc8cb7cae2ed915108daf93aa --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 675ab4e74c8f64..5c182a8699ec06 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -190,7 +190,6 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { bool doPeepholeMaskedRVV(MachineSDNode *Node); bool doPeepholeMergeVVMFold(); bool doPeepholeNoRegPassThru(); - bool performVMergeToVMv(SDNode *N); bool performCombineVMergeAndVOps(SDNode *N); }; From d9b15b068d19089f72fc0d7dc59ed1d6d77125dc Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 30 Oct 2023 08:48:31 -0700 Subject: [PATCH 035/144] [CGExprConstant] stop calling into ConstExprEmitter for Reference type destinations (#70366) Fixes a bug introduced by commit b54294e2c959 ("[clang][ConstantEmitter] have tryEmitPrivate[ForVarInit] try ConstExprEmitter fast-path first") In the added test case, the QualType is a LValueReferenceType. LValueReferenceType 0x558412998d90 'const char (&)[41]' `-ParenType 0x558412998d30 'const char[41]' sugar `-ConstantArrayType 0x558412998cf0 'const char[41]' 41 `-QualType 0x55841294c271 'const char' const `-BuiltinType 0x55841294c270 'char' Fixes: #69979 --- clang/lib/CodeGen/CGExprConstant.cpp | 7 ++++--- clang/test/CodeGenCXX/const-init-cxx11.cpp | 6 ++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 9b67a8b3335a16..3f508032e30d65 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1775,9 +1775,10 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E, QualType destType) { assert(!destType->isVoidType() && "can't emit a void constant"); - if (llvm::Constant *C = - ConstExprEmitter(*this).Visit(const_cast(E), destType)) - return C; + if (!destType->isReferenceType()) + if (llvm::Constant *C = + ConstExprEmitter(*this).Visit(const_cast(E), destType)) + return C; Expr::EvalResult Result; diff --git a/clang/test/CodeGenCXX/const-init-cxx11.cpp b/clang/test/CodeGenCXX/const-init-cxx11.cpp index d22d78d2b94edb..3a12fe444f137b 100644 --- a/clang/test/CodeGenCXX/const-init-cxx11.cpp +++ b/clang/test/CodeGenCXX/const-init-cxx11.cpp @@ -424,6 +424,8 @@ namespace DR2126 { // CHECK: @_ZN33ClassTemplateWithStaticDataMember3useE ={{.*}} constant ptr @_ZGRN33ClassTemplateWithStaticDataMember1SIvE1aE_ // CHECK: @_ZGRN39ClassTemplateWithHiddenStaticDataMember1SIvE1aE_ = linkonce_odr hidden constant i32 5, comdat // CHECK: @_ZN39ClassTemplateWithHiddenStaticDataMember3useE ={{.*}} constant ptr @_ZGRN39ClassTemplateWithHiddenStaticDataMember1SIvE1aE_ +// CHECK: @.str.[[STR:[0-9]+]] ={{.*}} constant [9 x i8] c"12345678\00" +// CHECK-NEXT: @e = global %struct.PR69979 { ptr @.str.[[STR]] } // CHECK: @_ZGRZN20InlineStaticConstRef3funEvE1i_ = linkonce_odr constant i32 10, comdat // CHECK20: @_ZZN12LocalVarInit4dtorEvE1a = internal constant {{.*}} i32 103 @@ -632,6 +634,10 @@ struct X { const char *f() { return &X::p; } } +struct PR69979 { + const char (&d)[9]; +} e {"12345678"}; + // VirtualMembers::TemplateClass::templateMethod() must be defined in this TU, // not just declared. // CHECK: define linkonce_odr void @_ZN14VirtualMembers13TemplateClassIiE14templateMethodEv(ptr {{[^,]*}} %this) From 3746f20b567b08f50d37904b821f808343aa334f Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 30 Oct 2023 15:58:08 +0000 Subject: [PATCH 036/144] [gn build] Port 72e6c1c70d5e --- llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index 94bada7a3e75b9..9ac58c45a3194b 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -106,6 +106,7 @@ static_library("LLVMRISCVCodeGen") { "RISCVDeadRegisterDefinitions.cpp", "RISCVExpandAtomicPseudoInsts.cpp", "RISCVExpandPseudoInsts.cpp", + "RISCVFoldMasks.cpp", "RISCVFrameLowering.cpp", "RISCVGatherScatterLowering.cpp", "RISCVISelDAGToDAG.cpp", From dc8c2a7794a65f98184eeddf9c3020c1e0a08580 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 30 Oct 2023 09:08:57 -0700 Subject: [PATCH 037/144] [flang][openacc][NFC] Add test for atomic with array ref (#70261) After #69944 lowering of array ref in atomic operation works properly. Add some lowering test to catch up regression in the future. --- .../Lower/OpenACC/acc-atomic-update-array.f90 | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 flang/test/Lower/OpenACC/acc-atomic-update-array.f90 diff --git a/flang/test/Lower/OpenACC/acc-atomic-update-array.f90 b/flang/test/Lower/OpenACC/acc-atomic-update-array.f90 new file mode 100644 index 00000000000000..b2f69fa05c0995 --- /dev/null +++ b/flang/test/Lower/OpenACC/acc-atomic-update-array.f90 @@ -0,0 +1,103 @@ +! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s --check-prefixes=CHECK,FIR +! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s --check-prefixes=CHECK,HLFIR + +subroutine atomic_update_array1(r, n, x) + implicit none + integer :: n + real :: r(n), x + integer :: i + + !$acc data copy(r) + + !$acc parallel loop + do i = 1, n + !$acc atomic update + r(i) = r(i) + x + !$acc end atomic + end do + + !$acc end data +end subroutine + +! CHECK-LABEL: func.func @_QPatomic_update_array1( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref> {fir.bindc_name = "r"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}, %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "x"}) { +! HLFIR: %[[DECL_ARG2:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFatomic_update_array1Ex"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! HLFIR: %[[DECL_ARG0:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) {uniq_name = "_QFatomic_update_array1Er"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! FIR: %[[ARRAY_REF:.*]] = fir.coordinate_of %[[ARG0]], %{{.*}} : (!fir.ref>, i64) -> !fir.ref +! HLFIR: %[[ARRAY_REF:.*]] = hlfir.designate %[[DECL_ARG0]]#0 (%{{.*}}) : (!fir.box>, i64) -> !fir.ref +! FIR: %[[LOAD_X:.*]] = fir.load %[[ARG2]] : !fir.ref +! HLFIR: %[[LOAD_X:.*]] = fir.load %[[DECL_ARG2]]#0 : !fir.ref +! CHECK: acc.atomic.update %[[ARRAY_REF]] : !fir.ref { +! CHECK: ^bb0(%[[ARG:.*]]: f32): +! CHECK: %[[ATOMIC:.*]] = arith.addf %[[ARG]], %[[LOAD_X]] fastmath : f32 +! CHECK: acc.yield %[[ATOMIC]] : f32 +! CHECK: } + + +subroutine atomic_read_array1(r, n, x) + implicit none + integer :: n + real :: r(n), x + + !$acc atomic read + x = r(n) +end subroutine + +! CHECK-LABEL: func.func @_QPatomic_read_array1( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref> {fir.bindc_name = "r"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}, %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "x"}) { +! FIR: %[[ARRAY_REF:.*]] = fir.coordinate_of %[[ARG0]], %{{.*}} : (!fir.ref>, i64) -> !fir.ref +! FIR: acc.atomic.read %[[ARG2]] = %[[ARRAY_REF]] : !fir.ref, f32 +! HLFIR: %[[DECL_X:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFatomic_read_array1Ex"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! HLFIR: %[[DECL_R:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) {uniq_name = "_QFatomic_read_array1Er"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! HLFIR: %[[DES:.*]] = hlfir.designate %[[DECL_R]]#0 (%{{.*}}) : (!fir.box>, i64) -> !fir.ref +! HLFIR: acc.atomic.read %[[DECL_X]]#1 = %[[DES]] : !fir.ref, f32 + +subroutine atomic_write_array1(r, n, x) + implicit none + integer :: n + real :: r(n), x + + !$acc atomic write + x = r(n) +end subroutine + +! CHECK-LABEL: func.func @_QPatomic_write_array1( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref> {fir.bindc_name = "r"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}, %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "x"}) { +! FIR: %[[ARRAY_REF:.*]] = fir.coordinate_of %[[ARG0]], %{{.*}} : (!fir.ref>, i64) -> !fir.ref +! FIR: %[[LOAD:.*]] = fir.load %[[ARRAY_REF]] : !fir.ref +! FIR: acc.atomic.write %[[ARG2]] = %[[LOAD]] : !fir.ref, f32 +! HLFIR: %[[DECL_X:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFatomic_write_array1Ex"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! HLFIR: %[[DECL_R:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) {uniq_name = "_QFatomic_write_array1Er"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! HLFIR: %[[DES:.*]] = hlfir.designate %[[DECL_R]]#0 (%{{.*}}) : (!fir.box>, i64) -> !fir.ref +! HLFIR: %[[LOAD:.*]] = fir.load %[[DES]] : !fir.ref +! HLFIR: acc.atomic.write %[[DECL_X]]#1 = %[[LOAD]] : !fir.ref, f32 + +subroutine atomic_capture_array1(r, n, x, y) + implicit none + integer :: n, i + real :: r(n), x, y + + !$acc atomic capture + r(i) = r(i) + x + y = r(i) + !$acc end atomic +end subroutine + +! CHECK-LABEL: func.func @_QPatomic_capture_array1( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref> {fir.bindc_name = "r"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}, %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "x"}, %[[ARG3:.*]]: !fir.ref {fir.bindc_name = "y"}) { +! HLFIR: %[[DECL_X:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFatomic_capture_array1Ex"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! HLFIR: %[[DECL_Y:.*]]:2 = hlfir.declare %[[ARG3]] {uniq_name = "_QFatomic_capture_array1Ey"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! HLFIR: %[[DECL_R:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) {uniq_name = "_QFatomic_capture_array1Er"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! HLFIR: %[[R_I:.*]] = hlfir.designate %[[DECL_R]]#0 (%{{.*}}) : (!fir.box>, i64) -> !fir.ref +! FIR: %[[R_I:.*]] = fir.coordinate_of %[[ARG0]], %{{.*}} : (!fir.ref>, i64) -> !fir.ref +! HLFIR: %[[LOAD:.*]] = fir.load %[[DECL_X]]#0 : !fir.ref +! FIR: %[[LOAD:.*]] = fir.load %[[ARG2]] : !fir.ref +! CHECK: acc.atomic.capture { +! CHECK: acc.atomic.update %[[R_I]] : !fir.ref { +! CHECK: ^bb0(%[[ARG:.*]]: f32): +! CHECK: %[[ADD:.*]] = arith.addf %[[ARG]], %[[LOAD]] fastmath : f32 +! CHECK: acc.yield %[[ADD]] : f32 +! CHECK: } +! HLFIR: acc.atomic.read %[[DECL_Y]]#1 = %[[R_I]] : !fir.ref, f32 +! FIR: acc.atomic.read %[[ARG3]] = %[[R_I]] : !fir.ref, f32 +! CHECK: } From f95b2f1acf1171abb0d00089fd4c9238753847e3 Mon Sep 17 00:00:00 2001 From: Alan Phipps Date: Thu, 21 Sep 2023 13:07:31 -0500 Subject: [PATCH 038/144] Reland "[InstrProf][compiler-rt] Enable MC/DC Support in LLVM Source-based Code Coverage (1/3)" Part 1 of 3. This includes the LLVM back-end processing and profile reading/writing components. compiler-rt changes are included. Differential Revision: https://reviews.llvm.org/D138846 --- clang/lib/Driver/ToolChains/Darwin.cpp | 4 +- clang/test/Driver/darwin-ld.c | 2 +- compiler-rt/include/profile/InstrProfData.inc | 22 +- compiler-rt/lib/profile/InstrProfiling.c | 4 + compiler-rt/lib/profile/InstrProfiling.h | 25 +- .../lib/profile/InstrProfilingBuffer.c | 42 +- compiler-rt/lib/profile/InstrProfilingFile.c | 50 ++- .../lib/profile/InstrProfilingInternal.h | 8 +- compiler-rt/lib/profile/InstrProfilingMerge.c | 33 +- .../lib/profile/InstrProfilingPlatformAIX.c | 7 +- .../profile/InstrProfilingPlatformDarwin.c | 9 + .../lib/profile/InstrProfilingPlatformLinux.c | 10 + .../lib/profile/InstrProfilingPlatformOther.c | 4 + .../profile/InstrProfilingPlatformWindows.c | 7 + .../lib/profile/InstrProfilingWriter.c | 18 +- .../profile/instrprof-write-buffer-internal.c | 21 +- llvm/docs/LangRef.rst | 138 +++++++ llvm/include/llvm/IR/IntrinsicInst.h | 94 ++++- llvm/include/llvm/IR/Intrinsics.td | 15 + .../ProfileData/Coverage/CoverageMapping.h | 4 +- llvm/include/llvm/ProfileData/InstrProf.h | 21 +- .../llvm/ProfileData/InstrProfData.inc | 22 +- .../llvm/ProfileData/InstrProfReader.h | 9 + .../Instrumentation/InstrProfiling.h | 46 ++- .../SelectionDAG/SelectionDAGBuilder.cpp | 6 + llvm/lib/IR/IntrinsicInst.cpp | 4 +- .../Coverage/CoverageMappingReader.cpp | 4 + llvm/lib/ProfileData/InstrProf.cpp | 23 +- llvm/lib/ProfileData/InstrProfCorrelator.cpp | 4 + llvm/lib/ProfileData/InstrProfReader.cpp | 111 ++++- llvm/lib/ProfileData/InstrProfWriter.cpp | 19 + .../Instrumentation/InstrProfiling.cpp | 379 ++++++++++++++---- .../Instrumentation/InstrProfiling/mcdc.ll | 53 +++ .../Transforms/PGOProfile/comdat_internal.ll | 4 +- .../tools/llvm-profdata/Inputs/basic.profraw | Bin 152 -> 192 bytes .../llvm-profdata/Inputs/c-general.profraw | Bin 1800 -> 2016 bytes .../llvm-profdata/Inputs/compat.profdata.v10 | Bin 0 -> 872 bytes .../llvm-profdata/Inputs/compressed.profraw | Bin 1768 -> 1968 bytes .../llvm-profdata/binary-ids-padding.test | 13 +- llvm/test/tools/llvm-profdata/compat.proftext | 23 ++ .../llvm-profdata/large-binary-id-size.test | 5 +- ...alformed-not-space-for-another-header.test | 9 +- .../malformed-num-counters-zero.test | 10 +- .../malformed-ptr-to-counter-array.test | 9 +- .../test/tools/llvm-profdata/mcdc-bitmap.test | 201 ++++++++++ .../misaligned-binary-ids-size.test | 2 +- .../mismatched-raw-profile-header.test | 3 + .../tools/llvm-profdata/raw-32-bits-be.test | 28 +- .../tools/llvm-profdata/raw-32-bits-le.test | 28 +- .../tools/llvm-profdata/raw-64-bits-be.test | 24 +- .../tools/llvm-profdata/raw-64-bits-le.test | 24 +- .../tools/llvm-profdata/raw-two-profiles.test | 14 +- 52 files changed, 1441 insertions(+), 174 deletions(-) create mode 100644 llvm/test/Instrumentation/InstrProfiling/mcdc.ll create mode 100644 llvm/test/tools/llvm-profdata/Inputs/compat.profdata.v10 create mode 100644 llvm/test/tools/llvm-profdata/mcdc-bitmap.test diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 15b9889157b903..f28e08d81bf29b 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1400,7 +1400,7 @@ void Darwin::addProfileRTLibs(const ArgList &Args, addExportedSymbol(CmdArgs, "_reset_fn_list"); } - // Align __llvm_prf_{cnts,data} sections to the maximum expected page + // Align __llvm_prf_{cnts,bits,data} sections to the maximum expected page // alignment. This allows profile counters to be mmap()'d to disk. Note that // it's not enough to just page-align __llvm_prf_cnts: the following section // must also be page-aligned so that its data is not clobbered by mmap(). @@ -1410,7 +1410,7 @@ void Darwin::addProfileRTLibs(const ArgList &Args, // extra alignment also allows the same binary to be used with/without sync // enabled. if (!ForGCOV) { - for (auto IPSK : {llvm::IPSK_cnts, llvm::IPSK_data}) { + for (auto IPSK : {llvm::IPSK_cnts, llvm::IPSK_bitmap, llvm::IPSK_data}) { addSectalignToPage( Args, CmdArgs, "__DATA", llvm::getInstrProfSectionName(IPSK, llvm::Triple::MachO, diff --git a/clang/test/Driver/darwin-ld.c b/clang/test/Driver/darwin-ld.c index b835b1f876beb5..f0ca411430cc78 100644 --- a/clang/test/Driver/darwin-ld.c +++ b/clang/test/Driver/darwin-ld.c @@ -336,7 +336,7 @@ // RUN: FileCheck -check-prefix=PROFILE_SECTALIGN %s < %t.log // RUN: %clang -target arm64-apple-ios12 -fprofile-instr-generate -### %t.o 2> %t.log // RUN: FileCheck -check-prefix=PROFILE_SECTALIGN %s < %t.log -// PROFILE_SECTALIGN: "-sectalign" "__DATA" "__llvm_prf_cnts" "0x4000" "-sectalign" "__DATA" "__llvm_prf_data" "0x4000" +// PROFILE_SECTALIGN: "-sectalign" "__DATA" "__llvm_prf_cnts" "0x4000" "-sectalign" "__DATA" "__llvm_prf_bits" "0x4000" "-sectalign" "__DATA" "__llvm_prf_data" "0x4000" // RUN: %clang -target x86_64-apple-darwin12 -fprofile-instr-generate --coverage -### %t.o 2> %t.log // RUN: FileCheck -check-prefix=NO_PROFILE_EXPORT %s < %t.log diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index 8ba7e186d4fb1a..1cf83011206a84 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -76,6 +76,7 @@ INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ Inc->getHash()->getZExtValue())) INSTR_PROF_DATA(const IntPtrT, IntPtrTy, CounterPtr, RelativeCounterPtr) +INSTR_PROF_DATA(const IntPtrT, IntPtrTy, BitmapPtr, RelativeBitmapPtr) /* This is used to map function pointers for the indirect call targets to * function name hashes during the conversion from raw to merged profile * data. @@ -87,7 +88,9 @@ INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \ INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \ ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters)) INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \ - ConstantArray::get(Int16ArrayTy, Int16ArrayVals)) + ConstantArray::get(Int16ArrayTy, Int16ArrayVals)) \ +INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumBitmapBytes, \ + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumBitmapBytes)) #undef INSTR_PROF_DATA /* INSTR_PROF_DATA end. */ @@ -132,9 +135,13 @@ INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) INSTR_PROF_RAW_HEADER(uint64_t, NumCounters, NumCounters) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters) +INSTR_PROF_RAW_HEADER(uint64_t, NumBitmapBytes, NumBitmapBytes) +INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterBitmapBytes, PaddingBytesAfterBitmapBytes) INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin - (uintptr_t)DataBegin) +INSTR_PROF_RAW_HEADER(uint64_t, BitmapDelta, + (uintptr_t)BitmapBegin - (uintptr_t)DataBegin) INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) #undef INSTR_PROF_RAW_HEADER @@ -267,6 +274,9 @@ INSTR_PROF_SECT_ENTRY(IPSK_data, \ INSTR_PROF_SECT_ENTRY(IPSK_cnts, \ INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \ INSTR_PROF_CNTS_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_bitmap, \ + INSTR_PROF_QUOTE(INSTR_PROF_BITS_COMMON), \ + INSTR_PROF_BITS_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_name, \ INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \ INSTR_PROF_NAME_COFF, "__DATA,") @@ -645,11 +655,11 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 /* Raw profile format version (start from 1). */ -#define INSTR_PROF_RAW_VERSION 8 +#define INSTR_PROF_RAW_VERSION 9 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 10 +#define INSTR_PROF_INDEX_VERSION 11 /* Coverage mapping format version (start from 0). */ -#define INSTR_PROF_COVMAP_VERSION 5 +#define INSTR_PROF_COVMAP_VERSION 6 /* Profile version is always of type uint64_t. Reserve the upper 32 bits in the * version for other variants of profile. We set the 8th most significant bit @@ -686,6 +696,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_DATA_COMMON __llvm_prf_data #define INSTR_PROF_NAME_COMMON __llvm_prf_names #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts +#define INSTR_PROF_BITS_COMMON __llvm_prf_bits #define INSTR_PROF_VALS_COMMON __llvm_prf_vals #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds #define INSTR_PROF_COVMAP_COMMON __llvm_covmap @@ -697,6 +708,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_DATA_COFF ".lprfd$M" #define INSTR_PROF_NAME_COFF ".lprfn$M" #define INSTR_PROF_CNTS_COFF ".lprfc$M" +#define INSTR_PROF_BITS_COFF ".lprfb$M" #define INSTR_PROF_VALS_COFF ".lprfv$M" #define INSTR_PROF_VNODES_COFF ".lprfnd$M" #define INSTR_PROF_COVMAP_COFF ".lcovmap$M" @@ -708,6 +720,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_DATA_COFF #define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_NAME_COFF #define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_CNTS_COFF +#define INSTR_PROF_BITS_SECT_NAME INSTR_PROF_BITS_COFF /* Array of pointers. Each pointer points to a list * of value nodes associated with one value site. */ @@ -722,6 +735,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON) #define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON) #define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON) +#define INSTR_PROF_BITS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_BITS_COMMON) /* Array of pointers. Each pointer points to a list * of value nodes associated with one value site. */ diff --git a/compiler-rt/lib/profile/InstrProfiling.c b/compiler-rt/lib/profile/InstrProfiling.c index 0dd5ff5ae6331c..da04d8ebdec95b 100644 --- a/compiler-rt/lib/profile/InstrProfiling.c +++ b/compiler-rt/lib/profile/InstrProfiling.c @@ -60,6 +60,10 @@ COMPILER_RT_VISIBILITY void __llvm_profile_reset_counters(void) { (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE) ? 0xFF : 0; memset(I, ResetValue, E - I); + I = __llvm_profile_begin_bitmap(); + E = __llvm_profile_end_bitmap(); + memset(I, 0x0, E - I); + const __llvm_profile_data *DataBegin = __llvm_profile_begin_data(); const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const __llvm_profile_data *DI; diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h index 4433d7bd48871f..e143149fca8270 100644 --- a/compiler-rt/lib/profile/InstrProfiling.h +++ b/compiler-rt/lib/profile/InstrProfiling.h @@ -88,6 +88,8 @@ const char *__llvm_profile_begin_names(void); const char *__llvm_profile_end_names(void); char *__llvm_profile_begin_counters(void); char *__llvm_profile_end_counters(void); +char *__llvm_profile_begin_bitmap(void); +char *__llvm_profile_end_bitmap(void); ValueProfNode *__llvm_profile_begin_vnodes(); ValueProfNode *__llvm_profile_end_vnodes(); uint32_t *__llvm_profile_begin_orderfile(); @@ -101,11 +103,11 @@ void __llvm_profile_reset_counters(void); /*! * \brief Merge profile data from buffer. * - * Read profile data form buffer \p Profile and merge with in-process profile - * counters. The client is expected to have checked or already knows the profile - * data in the buffer matches the in-process counter structure before calling - * it. Returns 0 (success) if the profile data is valid. Upon reading - * invalid/corrupted profile data, returns 1 (failure). + * Read profile data from buffer \p Profile and merge with in-process profile + * counters and bitmaps. The client is expected to have checked or already + * know the profile data in the buffer matches the in-process counter + * structure before calling it. Returns 0 (success) if the profile data is + * valid. Upon reading invalid/corrupted profile data, returns 1 (failure). */ int __llvm_profile_merge_from_buffer(const char *Profile, uint64_t Size); @@ -113,8 +115,8 @@ int __llvm_profile_merge_from_buffer(const char *Profile, uint64_t Size); * * Returns 0 (success) if the profile data in buffer \p Profile with size * \p Size was generated by the same binary and therefore matches - * structurally the in-process counters. If the profile data in buffer is - * not compatible, the interface returns 1 (failure). + * structurally the in-process counters and bitmaps. If the profile data in + * buffer is not compatible, the interface returns 1 (failure). */ int __llvm_profile_check_compatibility(const char *Profile, uint64_t Size); @@ -276,6 +278,10 @@ uint64_t __llvm_profile_get_num_counters(const char *Begin, const char *End); /*! \brief Get the size of the profile counters section in bytes. */ uint64_t __llvm_profile_get_counters_size(const char *Begin, const char *End); +/*! \brief Get the number of bytes in the profile bitmap section. */ +uint64_t __llvm_profile_get_num_bitmap_bytes(const char *Begin, + const char *End); + /* ! \brief Given the sizes of the data and counter information, return the * number of padding bytes before and after the counters, and after the names, * in the raw profile. @@ -286,8 +292,9 @@ uint64_t __llvm_profile_get_counters_size(const char *Begin, const char *End); * needed to achieve that. */ void __llvm_profile_get_padding_sizes_for_counters( - uint64_t DataSize, uint64_t CountersSize, uint64_t NamesSize, - uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters, + uint64_t DataSize, uint64_t CountersSize, uint64_t NumBitmapBytes, + uint64_t NamesSize, uint64_t *PaddingBytesBeforeCounters, + uint64_t *PaddingBytesAfterCounters, uint64_t *PaddingBytesAfterBitmap, uint64_t *PaddingBytesAfterNames); /*! diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c index 61ac5d9c028500..c7217b2dfef8a9 100644 --- a/compiler-rt/lib/profile/InstrProfilingBuffer.c +++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c @@ -43,11 +43,14 @@ uint64_t __llvm_profile_get_size_for_buffer(void) { const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const char *CountersBegin = __llvm_profile_begin_counters(); const char *CountersEnd = __llvm_profile_end_counters(); + const char *BitmapBegin = __llvm_profile_begin_bitmap(); + const char *BitmapEnd = __llvm_profile_end_bitmap(); const char *NamesBegin = __llvm_profile_begin_names(); const char *NamesEnd = __llvm_profile_end_names(); return __llvm_profile_get_size_for_buffer_internal( - DataBegin, DataEnd, CountersBegin, CountersEnd, NamesBegin, NamesEnd); + DataBegin, DataEnd, CountersBegin, CountersEnd, BitmapBegin, BitmapEnd, + NamesBegin, NamesEnd); } COMPILER_RT_VISIBILITY @@ -83,6 +86,12 @@ uint64_t __llvm_profile_get_counters_size(const char *Begin, const char *End) { __llvm_profile_counter_entry_size(); } +COMPILER_RT_VISIBILITY +uint64_t __llvm_profile_get_num_bitmap_bytes(const char *Begin, + const char *End) { + return (End - Begin); +} + /// Calculate the number of padding bytes needed to add to \p Offset in order /// for (\p Offset + Padding) to be page-aligned. static uint64_t calculateBytesNeededToPageAlign(uint64_t Offset) { @@ -102,13 +111,16 @@ static int needsCounterPadding(void) { COMPILER_RT_VISIBILITY void __llvm_profile_get_padding_sizes_for_counters( - uint64_t DataSize, uint64_t CountersSize, uint64_t NamesSize, - uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters, + uint64_t DataSize, uint64_t CountersSize, uint64_t NumBitmapBytes, + uint64_t NamesSize, uint64_t *PaddingBytesBeforeCounters, + uint64_t *PaddingBytesAfterCounters, uint64_t *PaddingBytesAfterBitmapBytes, uint64_t *PaddingBytesAfterNames) { if (!needsCounterPadding()) { *PaddingBytesBeforeCounters = 0; *PaddingBytesAfterCounters = __llvm_profile_get_num_padding_bytes(CountersSize); + *PaddingBytesAfterBitmapBytes = + __llvm_profile_get_num_padding_bytes(NumBitmapBytes); *PaddingBytesAfterNames = __llvm_profile_get_num_padding_bytes(NamesSize); return; } @@ -118,31 +130,37 @@ void __llvm_profile_get_padding_sizes_for_counters( *PaddingBytesBeforeCounters = calculateBytesNeededToPageAlign(sizeof(__llvm_profile_header) + DataSize); *PaddingBytesAfterCounters = calculateBytesNeededToPageAlign(CountersSize); + *PaddingBytesAfterBitmapBytes = + calculateBytesNeededToPageAlign(NumBitmapBytes); *PaddingBytesAfterNames = calculateBytesNeededToPageAlign(NamesSize); } COMPILER_RT_VISIBILITY uint64_t __llvm_profile_get_size_for_buffer_internal( const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, - const char *CountersBegin, const char *CountersEnd, const char *NamesBegin, - const char *NamesEnd) { + const char *CountersBegin, const char *CountersEnd, const char *BitmapBegin, + const char *BitmapEnd, const char *NamesBegin, const char *NamesEnd) { /* Match logic in __llvm_profile_write_buffer(). */ const uint64_t NamesSize = (NamesEnd - NamesBegin) * sizeof(char); uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd); uint64_t CountersSize = __llvm_profile_get_counters_size(CountersBegin, CountersEnd); + const uint64_t NumBitmapBytes = + __llvm_profile_get_num_bitmap_bytes(BitmapBegin, BitmapEnd); /* Determine how much padding is needed before/after the counters and after * the names. */ uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters, - PaddingBytesAfterNames; + PaddingBytesAfterNames, PaddingBytesAfterBitmapBytes; __llvm_profile_get_padding_sizes_for_counters( - DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters, - &PaddingBytesAfterCounters, &PaddingBytesAfterNames); + DataSize, CountersSize, NumBitmapBytes, NamesSize, + &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters, + &PaddingBytesAfterBitmapBytes, &PaddingBytesAfterNames); return sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) + DataSize + PaddingBytesBeforeCounters + CountersSize + - PaddingBytesAfterCounters + NamesSize + PaddingBytesAfterNames; + PaddingBytesAfterCounters + NumBitmapBytes + + PaddingBytesAfterBitmapBytes + NamesSize + PaddingBytesAfterNames; } COMPILER_RT_VISIBILITY @@ -160,9 +178,11 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer(char *Buffer) { COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal( char *Buffer, const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, const char *CountersBegin, - const char *CountersEnd, const char *NamesBegin, const char *NamesEnd) { + const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd, + const char *NamesBegin, const char *NamesEnd) { ProfDataWriter BufferWriter; initBufferWriter(&BufferWriter, Buffer); return lprofWriteDataImpl(&BufferWriter, DataBegin, DataEnd, CountersBegin, - CountersEnd, 0, NamesBegin, NamesEnd, 0); + CountersEnd, BitmapBegin, BitmapEnd, 0, NamesBegin, + NamesEnd, 0); } diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index ae7872c63f796c..1685b30b9492a6 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -108,14 +108,18 @@ static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const char *CountersBegin = __llvm_profile_begin_counters(); const char *CountersEnd = __llvm_profile_end_counters(); + const char *BitmapBegin = __llvm_profile_begin_bitmap(); + const char *BitmapEnd = __llvm_profile_end_bitmap(); const char *NamesBegin = __llvm_profile_begin_names(); const char *NamesEnd = __llvm_profile_end_names(); const uint64_t NamesSize = (NamesEnd - NamesBegin) * sizeof(char); uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd); uint64_t CountersSize = __llvm_profile_get_counters_size(CountersBegin, CountersEnd); + uint64_t NumBitmapBytes = + __llvm_profile_get_num_bitmap_bytes(BitmapBegin, BitmapEnd); - /* Check that the counter and data sections in this image are + /* Check that the counter, bitmap, and data sections in this image are * page-aligned. */ unsigned PageSize = getpagesize(); if ((intptr_t)CountersBegin % PageSize != 0) { @@ -123,6 +127,11 @@ static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { CountersBegin, PageSize); return 1; } + if ((intptr_t)BitmapBegin % PageSize != 0) { + PROF_ERR("Bitmap section not page-aligned (start = %p, pagesz = %u).\n", + BitmapBegin, PageSize); + return 1; + } if ((intptr_t)DataBegin % PageSize != 0) { PROF_ERR("Data section not page-aligned (start = %p, pagesz = %u).\n", DataBegin, PageSize); @@ -132,10 +141,11 @@ static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { /* Determine how much padding is needed before/after the counters and * after the names. */ uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters, - PaddingBytesAfterNames; + PaddingBytesAfterNames, PaddingBytesAfterBitmapBytes; __llvm_profile_get_padding_sizes_for_counters( - DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters, - &PaddingBytesAfterCounters, &PaddingBytesAfterNames); + DataSize, CountersSize, NumBitmapBytes, NamesSize, + &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters, + &PaddingBytesAfterBitmapBytes, &PaddingBytesAfterNames); uint64_t PageAlignedCountersLength = CountersSize + PaddingBytesAfterCounters; uint64_t FileOffsetToCounters = CurrentFileOffset + @@ -155,6 +165,31 @@ static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { FileOffsetToCounters); return 1; } + + /* Also mmap MCDC bitmap bytes. If there aren't any bitmap bytes, mmap() + * will fail with EINVAL. */ + if (NumBitmapBytes == 0) + return 0; + + uint64_t PageAlignedBitmapLength = + NumBitmapBytes + PaddingBytesAfterBitmapBytes; + uint64_t FileOffsetToBitmap = + CurrentFileOffset + sizeof(__llvm_profile_header) + DataSize + + PaddingBytesBeforeCounters + CountersSize + PaddingBytesAfterCounters; + void *BitmapMmap = + mmap((void *)BitmapBegin, PageAlignedBitmapLength, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, Fileno, FileOffsetToBitmap); + if (BitmapMmap != BitmapBegin) { + PROF_ERR( + "Continuous counter sync mode is enabled, but mmap() failed (%s).\n" + " - BitmapBegin: %p\n" + " - PageAlignedBitmapLength: %" PRIu64 "\n" + " - Fileno: %d\n" + " - FileOffsetToBitmap: %" PRIu64 "\n", + strerror(errno), BitmapBegin, PageAlignedBitmapLength, Fileno, + FileOffsetToBitmap); + return 1; + } return 0; } #elif defined(__ELF__) || defined(_WIN32) @@ -197,6 +232,8 @@ static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const char *CountersBegin = __llvm_profile_begin_counters(); const char *CountersEnd = __llvm_profile_end_counters(); + const char *BitmapBegin = __llvm_profile_begin_bitmap(); + const char *BitmapEnd = __llvm_profile_end_bitmap(); uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd); /* Get the file size. */ uint64_t FileSize = 0; @@ -218,6 +255,11 @@ static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) { /* Return the memory allocated for counters to OS. */ lprofReleaseMemoryPagesToOS((uintptr_t)CountersBegin, (uintptr_t)CountersEnd); + + /* BIAS MODE not supported yet for Bitmap (MCDC). */ + + /* Return the memory allocated for counters to OS. */ + lprofReleaseMemoryPagesToOS((uintptr_t)BitmapBegin, (uintptr_t)BitmapEnd); return 0; } #else diff --git a/compiler-rt/lib/profile/InstrProfilingInternal.h b/compiler-rt/lib/profile/InstrProfilingInternal.h index 360165e32ab3fe..03ed67fcfa766f 100644 --- a/compiler-rt/lib/profile/InstrProfilingInternal.h +++ b/compiler-rt/lib/profile/InstrProfilingInternal.h @@ -21,8 +21,8 @@ */ uint64_t __llvm_profile_get_size_for_buffer_internal( const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, - const char *CountersBegin, const char *CountersEnd, const char *NamesBegin, - const char *NamesEnd); + const char *CountersBegin, const char *CountersEnd, const char *BitmapBegin, + const char *BitmapEnd, const char *NamesBegin, const char *NamesEnd); /*! * \brief Write instrumentation data to the given buffer, given explicit @@ -36,7 +36,8 @@ uint64_t __llvm_profile_get_size_for_buffer_internal( int __llvm_profile_write_buffer_internal( char *Buffer, const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, const char *CountersBegin, - const char *CountersEnd, const char *NamesBegin, const char *NamesEnd); + const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd, + const char *NamesBegin, const char *NamesEnd); /*! * The data structure describing the data to be written by the @@ -153,6 +154,7 @@ int lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, const char *CountersBegin, const char *CountersEnd, + const char *BitmapBegin, const char *BitmapEnd, VPDataReaderType *VPDataReader, const char *NamesBegin, const char *NamesEnd, int SkipNameDataWrite); diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c index 9cf12f251f7262..c5f168bf751771 100644 --- a/compiler-rt/lib/profile/InstrProfilingMerge.c +++ b/compiler-rt/lib/profile/InstrProfilingMerge.c @@ -66,6 +66,9 @@ int __llvm_profile_check_compatibility(const char *ProfileData, Header->NumCounters != __llvm_profile_get_num_counters(__llvm_profile_begin_counters(), __llvm_profile_end_counters()) || + Header->NumBitmapBytes != + __llvm_profile_get_num_bitmap_bytes(__llvm_profile_begin_bitmap(), + __llvm_profile_end_bitmap()) || Header->NamesSize != (uint64_t)(__llvm_profile_end_names() - __llvm_profile_begin_names()) || Header->ValueKindLast != IPVK_Last) @@ -74,7 +77,8 @@ int __llvm_profile_check_compatibility(const char *ProfileData, if (ProfileSize < sizeof(__llvm_profile_header) + Header->BinaryIdsSize + Header->NumData * sizeof(__llvm_profile_data) + Header->NamesSize + - Header->NumCounters * __llvm_profile_counter_entry_size()) + Header->NumCounters * __llvm_profile_counter_entry_size() + + Header->NumBitmapBytes) return 1; for (SrcData = SrcDataStart, @@ -82,7 +86,8 @@ int __llvm_profile_check_compatibility(const char *ProfileData, SrcData < SrcDataEnd; ++SrcData, ++DstData) { if (SrcData->NameRef != DstData->NameRef || SrcData->FuncHash != DstData->FuncHash || - SrcData->NumCounters != DstData->NumCounters) + SrcData->NumCounters != DstData->NumCounters || + SrcData->NumBitmapBytes != DstData->NumBitmapBytes) return 1; } @@ -112,9 +117,11 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData, __llvm_profile_header *Header = (__llvm_profile_header *)ProfileData; char *SrcCountersStart, *DstCounter; const char *SrcCountersEnd, *SrcCounter; + const char *SrcBitmapStart; const char *SrcNameStart; const char *SrcValueProfDataStart, *SrcValueProfData; uintptr_t CountersDelta = Header->CountersDelta; + uintptr_t BitmapDelta = Header->BitmapDelta; SrcDataStart = (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) + @@ -123,11 +130,12 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData, SrcCountersStart = (char *)SrcDataEnd; SrcCountersEnd = SrcCountersStart + Header->NumCounters * __llvm_profile_counter_entry_size(); - SrcNameStart = SrcCountersEnd; + SrcBitmapStart = SrcCountersEnd; + SrcNameStart = SrcBitmapStart + Header->NumBitmapBytes; SrcValueProfDataStart = SrcNameStart + Header->NamesSize + __llvm_profile_get_num_padding_bytes(Header->NamesSize); - if (SrcNameStart < SrcCountersStart) + if (SrcNameStart < SrcCountersStart || SrcNameStart < SrcBitmapStart) return 1; // Merge counters by iterating the entire counter section when debug info @@ -157,6 +165,8 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData, // extend CounterPtr to get the original value. char *DstCounters = (char *)((uintptr_t)DstData + signextIfWin64(DstData->CounterPtr)); + char *DstBitmap = + (char *)((uintptr_t)DstData + signextIfWin64(DstData->BitmapPtr)); unsigned NVK = 0; // SrcData is a serialized representation of the memory image. We need to @@ -186,6 +196,21 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData, } } + const char *SrcBitmap = + SrcBitmapStart + ((uintptr_t)SrcData->BitmapPtr - BitmapDelta); + // BitmapDelta also needs to be decreased as we advance to the next data + // record. + BitmapDelta -= sizeof(*SrcData); + unsigned NB = SrcData->NumBitmapBytes; + // NumBitmapBytes may legitimately be 0. Just keep going. + if (NB != 0) { + if (SrcBitmap < SrcBitmapStart || (SrcBitmap + NB) > SrcNameStart) + return 1; + // Merge Src and Dst Bitmap bytes by simply ORing them together. + for (unsigned I = 0; I < NB; I++) + DstBitmap[I] |= SrcBitmap[I]; + } + /* Now merge value profile data. */ if (!VPMergeHook) continue; diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c b/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c index 63219da18ae3a9..9f46a98d78ac4e 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c @@ -187,6 +187,8 @@ void __llvm_profile_register_names_function(void *NamesStart, // define these zero length variables in each of the above 4 sections. static int dummy_cnts[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_CNTS_SECT_NAME); +static int dummy_bits[0] COMPILER_RT_SECTION( + COMPILER_RT_SEG INSTR_PROF_BITS_SECT_NAME); static int dummy_data[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_DATA_SECT_NAME); static const int dummy_name[0] COMPILER_RT_SECTION( @@ -202,8 +204,9 @@ static int dummy_vnds[0] COMPILER_RT_SECTION( #pragma GCC diagnostic ignored "-Wcast-qual" #endif COMPILER_RT_VISIBILITY -void *__llvm_profile_keep[] = {(void *)&dummy_cnts, (void *)&dummy_data, - (void *)&dummy_name, (void *)&dummy_vnds}; +void *__llvm_profile_keep[] = {(void *)&dummy_cnts, (void *)&dummy_bits, + (void *)&dummy_data, (void *)&dummy_name, + (void *)&dummy_vnds}; #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformDarwin.c b/compiler-rt/lib/profile/InstrProfilingPlatformDarwin.c index d9f2a113f5b020..2154d242a8174a 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformDarwin.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformDarwin.c @@ -31,6 +31,11 @@ extern char COMPILER_RT_VISIBILITY extern char CountersEnd __asm("section$end$__DATA$" INSTR_PROF_CNTS_SECT_NAME); COMPILER_RT_VISIBILITY +extern char + BitmapStart __asm("section$start$__DATA$" INSTR_PROF_BITS_SECT_NAME); +COMPILER_RT_VISIBILITY +extern char BitmapEnd __asm("section$end$__DATA$" INSTR_PROF_BITS_SECT_NAME); +COMPILER_RT_VISIBILITY extern uint32_t OrderFileStart __asm("section$start$__DATA$" INSTR_PROF_ORDERFILE_SECT_NAME); @@ -56,6 +61,10 @@ char *__llvm_profile_begin_counters(void) { return &CountersStart; } COMPILER_RT_VISIBILITY char *__llvm_profile_end_counters(void) { return &CountersEnd; } COMPILER_RT_VISIBILITY +char *__llvm_profile_begin_bitmap(void) { return &BitmapStart; } +COMPILER_RT_VISIBILITY +char *__llvm_profile_end_bitmap(void) { return &BitmapEnd; } +COMPILER_RT_VISIBILITY uint32_t *__llvm_profile_begin_orderfile(void) { return &OrderFileStart; } COMPILER_RT_VISIBILITY diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c index 2cce0a4b2c48d3..d0c42462e5e319 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c @@ -35,6 +35,8 @@ #define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_COMMON) #define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_COMMON) #define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_COMMON) +#define PROF_BITS_START INSTR_PROF_SECT_START(INSTR_PROF_BITS_COMMON) +#define PROF_BITS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_BITS_COMMON) #define PROF_ORDERFILE_START INSTR_PROF_SECT_START(INSTR_PROF_ORDERFILE_COMMON) #define PROF_VNODES_START INSTR_PROF_SECT_START(INSTR_PROF_VNODES_COMMON) #define PROF_VNODES_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNODES_COMMON) @@ -48,6 +50,8 @@ extern __llvm_profile_data PROF_DATA_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; +extern char PROF_BITS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; +extern char PROF_BITS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern uint32_t PROF_ORDERFILE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_NAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_NAME_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; @@ -74,6 +78,12 @@ COMPILER_RT_VISIBILITY char *__llvm_profile_begin_counters(void) { COMPILER_RT_VISIBILITY char *__llvm_profile_end_counters(void) { return &PROF_CNTS_STOP; } +COMPILER_RT_VISIBILITY char *__llvm_profile_begin_bitmap(void) { + return &PROF_BITS_START; +} +COMPILER_RT_VISIBILITY char *__llvm_profile_end_bitmap(void) { + return &PROF_BITS_STOP; +} COMPILER_RT_VISIBILITY uint32_t *__llvm_profile_begin_orderfile(void) { return &PROF_ORDERFILE_START; } diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformOther.c b/compiler-rt/lib/profile/InstrProfilingPlatformOther.c index c7b6e842c9fac2..5319ca813b43f2 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformOther.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformOther.c @@ -88,6 +88,10 @@ COMPILER_RT_VISIBILITY char *__llvm_profile_begin_counters(void) { return CountersFirst; } COMPILER_RT_VISIBILITY char *__llvm_profile_end_counters(void) { return CountersLast; } +COMPILER_RT_VISIBILITY +char *__llvm_profile_begin_bitmap(void) { return BitmapFirst; } +COMPILER_RT_VISIBILITY +char *__llvm_profile_end_bitmap(void) { return BitmapLast; } /* TODO: correctly set up OrderFileFirst. */ COMPILER_RT_VISIBILITY uint32_t *__llvm_profile_begin_orderfile(void) { return OrderFileFirst; } diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c b/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c index dd576b2f8357db..9dbd702865fd29 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformWindows.c @@ -14,6 +14,7 @@ #if defined(_MSC_VER) /* Merge read-write sections into .data. */ #pragma comment(linker, "/MERGE:.lprfc=.data") +#pragma comment(linker, "/MERGE:.lprfb=.data") #pragma comment(linker, "/MERGE:.lprfd=.data") #pragma comment(linker, "/MERGE:.lprfv=.data") #pragma comment(linker, "/MERGE:.lprfnd=.data") @@ -30,6 +31,8 @@ #pragma section(".lprfd$Z", read, write) #pragma section(".lprfc$A", read, write) #pragma section(".lprfc$Z", read, write) +#pragma section(".lprfb$A", read, write) +#pragma section(".lprfb$Z", read, write) #pragma section(".lorderfile$A", read, write) #pragma section(".lprfnd$A", read, write) #pragma section(".lprfnd$Z", read, write) @@ -43,6 +46,8 @@ const char COMPILER_RT_SECTION(".lprfn$Z") NamesEnd = '\0'; char COMPILER_RT_SECTION(".lprfc$A") CountersStart; char COMPILER_RT_SECTION(".lprfc$Z") CountersEnd; +char COMPILER_RT_SECTION(".lprfb$A") BitmapStart; +char COMPILER_RT_SECTION(".lprfb$Z") BitmapEnd; uint32_t COMPILER_RT_SECTION(".lorderfile$A") OrderFileStart; ValueProfNode COMPILER_RT_SECTION(".lprfnd$A") VNodesStart; @@ -58,6 +63,8 @@ const char *__llvm_profile_end_names(void) { return &NamesEnd; } char *__llvm_profile_begin_counters(void) { return &CountersStart + 1; } char *__llvm_profile_end_counters(void) { return &CountersEnd; } +char *__llvm_profile_begin_bitmap(void) { return &BitmapStart + 1; } +char *__llvm_profile_end_bitmap(void) { return &BitmapEnd; } uint32_t *__llvm_profile_begin_orderfile(void) { return &OrderFileStart; } ValueProfNode *__llvm_profile_begin_vnodes(void) { return &VNodesStart + 1; } diff --git a/compiler-rt/lib/profile/InstrProfilingWriter.c b/compiler-rt/lib/profile/InstrProfilingWriter.c index 1e22398a4c0f64..3b61f3def9f6ef 100644 --- a/compiler-rt/lib/profile/InstrProfilingWriter.c +++ b/compiler-rt/lib/profile/InstrProfilingWriter.c @@ -246,17 +246,20 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer, const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const char *CountersBegin = __llvm_profile_begin_counters(); const char *CountersEnd = __llvm_profile_end_counters(); + const char *BitmapBegin = __llvm_profile_begin_bitmap(); + const char *BitmapEnd = __llvm_profile_end_bitmap(); const char *NamesBegin = __llvm_profile_begin_names(); const char *NamesEnd = __llvm_profile_end_names(); return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin, - CountersEnd, VPDataReader, NamesBegin, NamesEnd, - SkipNameDataWrite); + CountersEnd, BitmapBegin, BitmapEnd, VPDataReader, + NamesBegin, NamesEnd, SkipNameDataWrite); } COMPILER_RT_VISIBILITY int lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, const char *CountersBegin, const char *CountersEnd, + const char *BitmapBegin, const char *BitmapEnd, VPDataReaderType *VPDataReader, const char *NamesBegin, const char *NamesEnd, int SkipNameDataWrite) { int DebugInfoCorrelate = @@ -271,6 +274,8 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, __llvm_profile_get_counters_size(CountersBegin, CountersEnd); const uint64_t NumCounters = __llvm_profile_get_num_counters(CountersBegin, CountersEnd); + const uint64_t NumBitmapBytes = + __llvm_profile_get_num_bitmap_bytes(BitmapBegin, BitmapEnd); const uint64_t NamesSize = DebugInfoCorrelate ? 0 : NamesEnd - NamesBegin; /* Create the header. */ @@ -279,11 +284,11 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, /* Determine how much padding is needed before/after the counters and after * the names. */ uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters, - PaddingBytesAfterNames; + PaddingBytesAfterNames, PaddingBytesAfterBitmapBytes; __llvm_profile_get_padding_sizes_for_counters( - DataSectionSize, CountersSectionSize, NamesSize, + DataSectionSize, CountersSectionSize, NumBitmapBytes, NamesSize, &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters, - &PaddingBytesAfterNames); + &PaddingBytesAfterBitmapBytes, &PaddingBytesAfterNames); { /* Initialize header structure. */ @@ -295,6 +300,7 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, * CountersDelta to match. */ #ifdef _WIN64 Header.CountersDelta = (uint32_t)Header.CountersDelta; + Header.BitmapDelta = (uint32_t)Header.BitmapDelta; #endif /* The data and names sections are omitted in lightweight mode. */ @@ -319,6 +325,8 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, {NULL, sizeof(uint8_t), PaddingBytesBeforeCounters, 1}, {CountersBegin, sizeof(uint8_t), CountersSectionSize, 0}, {NULL, sizeof(uint8_t), PaddingBytesAfterCounters, 1}, + {BitmapBegin, sizeof(uint8_t), NumBitmapBytes, 0}, + {NULL, sizeof(uint8_t), PaddingBytesAfterBitmapBytes, 1}, {(SkipNameDataWrite || DebugInfoCorrelate) ? NULL : NamesBegin, sizeof(uint8_t), NamesSize, 0}, {NULL, sizeof(uint8_t), PaddingBytesAfterNames, 1}}; diff --git a/compiler-rt/test/profile/instrprof-write-buffer-internal.c b/compiler-rt/test/profile/instrprof-write-buffer-internal.c index 7b96c6d91c33f5..d9670f739ca98c 100644 --- a/compiler-rt/test/profile/instrprof-write-buffer-internal.c +++ b/compiler-rt/test/profile/instrprof-write-buffer-internal.c @@ -25,17 +25,18 @@ const char *__llvm_profile_begin_names(void); const char *__llvm_profile_end_names(void); char *__llvm_profile_begin_counters(void); char *__llvm_profile_end_counters(void); +char *__llvm_profile_begin_bitmap(void); +char *__llvm_profile_end_bitmap(void); uint64_t __llvm_profile_get_size_for_buffer_internal( const void *DataBegin, const void *DataEnd, const char *CountersBegin, - const char *CountersEnd, const char *NamesBegin, const char *NamesEnd); + const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd, + const char *NamesBegin, const char *NamesEnd); -int __llvm_profile_write_buffer_internal(char *Buffer, const void *DataBegin, - const void *DataEnd, - const char *CountersBegin, - const char *CountersEnd, - const char *NamesBegin, - const char *NamesEnd); +int __llvm_profile_write_buffer_internal( + char *Buffer, const void *DataBegin, const void *DataEnd, + const char *CountersBegin, const char *CountersEnd, const char *BitmapBegin, + const char *BitmapEnd, const char *NamesBegin, const char *NamesEnd); void __llvm_profile_set_dumped(void); @@ -43,12 +44,14 @@ int main(int argc, const char *argv[]) { uint64_t bufsize = __llvm_profile_get_size_for_buffer_internal( __llvm_profile_begin_data(), __llvm_profile_end_data(), __llvm_profile_begin_counters(), __llvm_profile_end_counters(), + __llvm_profile_begin_bitmap(), __llvm_profile_end_bitmap(), __llvm_profile_begin_names(), __llvm_profile_end_names()); char *buf = malloc(bufsize); - int ret = __llvm_profile_write_buffer_internal(buf, - __llvm_profile_begin_data(), __llvm_profile_end_data(), + int ret = __llvm_profile_write_buffer_internal( + buf, __llvm_profile_begin_data(), __llvm_profile_end_data(), __llvm_profile_begin_counters(), __llvm_profile_end_counters(), + __llvm_profile_begin_bitmap(), __llvm_profile_end_bitmap(), __llvm_profile_begin_names(), __llvm_profile_end_names()); if (ret != 0) { diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 3631dff50f30d8..803503a0e8cc7a 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -13954,6 +13954,144 @@ pass will generate the appropriate data structures and replace the ``llvm.instrprof.value.profile`` intrinsic with the call to the profile runtime library with proper arguments. +'``llvm.instrprof.mcdc.parameters``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.instrprof.mcdc.parameters(ptr , i64 , + i32 ) + +Overview: +""""""""" + +The '``llvm.instrprof.mcdc.parameters``' intrinsic is used to initiate MC/DC +code coverage instrumentation for a function. + +Arguments: +"""""""""" + +The first argument is a pointer to a global variable containing the +name of the entity being instrumented. This should generally be the +(mangled) function name for a set of counters. + +The second argument is a hash value that can be used by the consumer +of the profile data to detect changes to the instrumented source. + +The third argument is the number of bitmap bytes required by the function to +record the number of test vectors executed for each boolean expression. + +Semantics: +"""""""""" + +This intrinsic represents basic MC/DC parameters initiating one or more MC/DC +instrumentation sequences in a function. It will cause the ``-instrprof`` pass +to generate the appropriate data structures and the code to instrument MC/DC +test vectors in a format that can be written out by a compiler runtime and +consumed via the ``llvm-profdata`` tool. + +'``llvm.instrprof.mcdc.condbitmap.update``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.instrprof.mcdc.condbitmap.update(ptr , i64 , + i32 , + ptr , + i1 ) + +Overview: +""""""""" + +The '``llvm.instrprof.mcdc.condbitmap.update``' intrinsic is used to track +MC/DC condition evaluation for each condition in a boolean expression. + +Arguments: +"""""""""" + +The first argument is a pointer to a global variable containing the +name of the entity being instrumented. This should generally be the +(mangled) function name for a set of counters. + +The second argument is a hash value that can be used by the consumer +of the profile data to detect changes to the instrumented source. + +The third argument is an ID of a condition to track. This value is used as a +bit index into the condition bitmap. + +The fourth argument is the address of the condition bitmap. + +The fifth argument is the boolean value representing the evaluation of the +condition (true or false) + +Semantics: +"""""""""" + +This intrinsic represents the update of a condition bitmap that is local to a +function and will cause the ``-instrprof`` pass to generate the code to +instrument the control flow around each condition in a boolean expression. The +ID of each condition corresponds to a bit index in the condition bitmap which +is set based on the evaluation of the condition. + +'``llvm.instrprof.mcdc.tvbitmap.update``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.instrprof.mcdc.tvbitmap.update(ptr , i64 , + i32 ) + i32 , + ptr ) + +Overview: +""""""""" + +The '``llvm.instrprof.mcdc.tvbitmap.update``' intrinsic is used to track MC/DC +test vector execution after each boolean expression has been fully executed. +The overall value of the condition bitmap, after it has been successively +updated using the '``llvm.instrprof.mcdc.condbitmap.update``' intrinsic with +the true or false evaluation of each condition, uniquely identifies an executed +MC/DC test vector and is used as a bit index into the global test vector +bitmap. + +Arguments: +"""""""""" + +The first argument is a pointer to a global variable containing the +name of the entity being instrumented. This should generally be the +(mangled) function name for a set of counters. + +The second argument is a hash value that can be used by the consumer +of the profile data to detect changes to the instrumented source. + +The third argument is the number of bitmap bytes required by the function to +record the number of test vectors executed for each boolean expression. + +The fourth argument is the byte index into the global test vector bitmap +corresponding to the function. + +The fifth argument is the address of the condition bitmap, which contains a +value representing an executed MC/DC test vector. It is loaded and used as the +bit index of the test vector bitmap. + +Semantics: +"""""""""" + +This intrinsic represents the final operation of an MC/DC instrumentation +sequence and will cause the ``-instrprof`` pass to generate the code to +instrument an update of a function's global test vector bitmap to indicate that +a test vector has been executed. The global test vector bitmap can be consumed +by the ``llvm-profdata`` and ``llvm-cov`` tools. + '``llvm.thread.pointer``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index c9777c72558be3..c26ecef6eaaee1 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -1424,6 +1424,11 @@ class InstrProfInstBase : public IntrinsicInst { ConstantInt *getHash() const { return cast(const_cast(getArgOperand(1))); } +}; + +/// A base class for all instrprof counter intrinsics. +class InstrProfCntrInstBase : public InstrProfInstBase { +public: // The number of counters for the instrumented function. ConstantInt *getNumCounters() const; // The index of the counter that this instruction acts on. @@ -1431,7 +1436,7 @@ class InstrProfInstBase : public IntrinsicInst { }; /// This represents the llvm.instrprof.cover intrinsic. -class InstrProfCoverInst : public InstrProfInstBase { +class InstrProfCoverInst : public InstrProfCntrInstBase { public: static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::instrprof_cover; @@ -1442,7 +1447,7 @@ class InstrProfCoverInst : public InstrProfInstBase { }; /// This represents the llvm.instrprof.increment intrinsic. -class InstrProfIncrementInst : public InstrProfInstBase { +class InstrProfIncrementInst : public InstrProfCntrInstBase { public: static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::instrprof_increment || @@ -1466,7 +1471,7 @@ class InstrProfIncrementInstStep : public InstrProfIncrementInst { }; /// This represents the llvm.instrprof.timestamp intrinsic. -class InstrProfTimestampInst : public InstrProfInstBase { +class InstrProfTimestampInst : public InstrProfCntrInstBase { public: static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::instrprof_timestamp; @@ -1477,7 +1482,7 @@ class InstrProfTimestampInst : public InstrProfInstBase { }; /// This represents the llvm.instrprof.value.profile intrinsic. -class InstrProfValueProfileInst : public InstrProfInstBase { +class InstrProfValueProfileInst : public InstrProfCntrInstBase { public: static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::instrprof_value_profile; @@ -1500,6 +1505,87 @@ class InstrProfValueProfileInst : public InstrProfInstBase { } }; +/// A base class for instrprof mcdc intrinsics that require global bitmap bytes. +class InstrProfMCDCBitmapInstBase : public InstrProfInstBase { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_mcdc_parameters || + I->getIntrinsicID() == Intrinsic::instrprof_mcdc_tvbitmap_update; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + /// \return The number of bytes used for the MCDC bitmaps for the instrumented + /// function. + ConstantInt *getNumBitmapBytes() const { + return cast(const_cast(getArgOperand(2))); + } +}; + +/// This represents the llvm.instrprof.mcdc.parameters intrinsic. +class InstrProfMCDCBitmapParameters : public InstrProfMCDCBitmapInstBase { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_mcdc_parameters; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.instrprof.mcdc.tvbitmap.update intrinsic. +class InstrProfMCDCTVBitmapUpdate : public InstrProfMCDCBitmapInstBase { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_mcdc_tvbitmap_update; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + /// \return The index of the TestVector Bitmap upon which this intrinsic + /// acts. + ConstantInt *getBitmapIndex() const { + return cast(const_cast(getArgOperand(3))); + } + + /// \return The address of the corresponding condition bitmap containing + /// the index of the TestVector to update within the TestVector Bitmap. + Value *getMCDCCondBitmapAddr() const { + return cast(const_cast(getArgOperand(4))); + } +}; + +/// This represents the llvm.instrprof.mcdc.condbitmap.update intrinsic. +/// It does not pertain to global bitmap updates or parameters and so doesn't +/// inherit from InstrProfMCDCBitmapInstBase. +class InstrProfMCDCCondBitmapUpdate : public InstrProfInstBase { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_mcdc_condbitmap_update; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + /// \return The ID of the condition to update. + ConstantInt *getCondID() const { + return cast(const_cast(getArgOperand(2))); + } + + /// \return The address of the corresponding condition bitmap. + Value *getMCDCCondBitmapAddr() const { + return cast(const_cast(getArgOperand(3))); + } + + /// \return The boolean value to set in the condition bitmap for the + /// corresponding condition ID. This represents how the condition evaluated. + Value *getCondBool() const { + return cast(const_cast(getArgOperand(4))); + } +}; + class PseudoProbeInst : public IntrinsicInst { public: static bool classof(const IntrinsicInst *I) { diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 80c3d8d403d911..d7a81591d03443 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -909,6 +909,21 @@ def int_instrprof_value_profile : Intrinsic<[], llvm_i64_ty, llvm_i32_ty, llvm_i32_ty]>; +// A parameter configuration for instrumentation based MCDC profiling. +def int_instrprof_mcdc_parameters : Intrinsic<[], + [llvm_ptr_ty, llvm_i64_ty, + llvm_i32_ty]>; + +// A test vector bitmap update for instrumentation based MCDC profiling. +def int_instrprof_mcdc_tvbitmap_update : Intrinsic<[], + [llvm_ptr_ty, llvm_i64_ty, + llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty]>; + +// A condition bitmap value update for instrumentation based MCDC profiling. +def int_instrprof_mcdc_condbitmap_update : Intrinsic<[], + [llvm_ptr_ty, llvm_i64_ty, + llvm_i32_ty, llvm_ptr_ty, llvm_i1_ty]>; + def int_call_preallocated_setup : DefaultAttrsIntrinsic<[llvm_token_ty], [llvm_i32_ty]>; def int_call_preallocated_arg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_i32_ty]>; def int_call_preallocated_teardown : DefaultAttrsIntrinsic<[], [llvm_token_ty]>; diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index 948203eea86365..e9fbc4631dd5fb 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -1027,7 +1027,9 @@ enum CovMapVersion { // Compilation directory is stored separately and combined with relative // filenames to produce an absolute file path. Version6 = 5, - // The current version is Version6. + // Branch regions extended and Decision Regions added for MC/DC. + Version7 = 6, + // The current version is Version7. CurrentVersion = INSTR_PROF_COVMAP_VERSION }; diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index e968f8ffd5075f..e16785efc3a365 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -96,6 +96,9 @@ inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } /// Return the name prefix of profile counter variables. inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; } +/// Return the name prefix of profile bitmap variables. +inline StringRef getInstrProfBitmapVarPrefix() { return "__profbm_"; } + /// Return the name prefix of value profile variables. inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; } @@ -338,6 +341,7 @@ enum class instrprof_error { invalid_prof, hash_mismatch, count_mismatch, + bitmap_mismatch, counter_overflow, value_site_count_mismatch, compress_failed, @@ -693,18 +697,23 @@ struct InstrProfValueSiteRecord { /// Profiling information for a single function. struct InstrProfRecord { std::vector Counts; + std::vector BitmapBytes; InstrProfRecord() = default; InstrProfRecord(std::vector Counts) : Counts(std::move(Counts)) {} + InstrProfRecord(std::vector Counts, + std::vector BitmapBytes) + : Counts(std::move(Counts)), BitmapBytes(std::move(BitmapBytes)) {} InstrProfRecord(InstrProfRecord &&) = default; InstrProfRecord(const InstrProfRecord &RHS) - : Counts(RHS.Counts), + : Counts(RHS.Counts), BitmapBytes(RHS.BitmapBytes), ValueData(RHS.ValueData ? std::make_unique(*RHS.ValueData) : nullptr) {} InstrProfRecord &operator=(InstrProfRecord &&) = default; InstrProfRecord &operator=(const InstrProfRecord &RHS) { Counts = RHS.Counts; + BitmapBytes = RHS.BitmapBytes; if (!RHS.ValueData) { ValueData = nullptr; return *this; @@ -883,6 +892,11 @@ struct NamedInstrProfRecord : InstrProfRecord { NamedInstrProfRecord(StringRef Name, uint64_t Hash, std::vector Counts) : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} + NamedInstrProfRecord(StringRef Name, uint64_t Hash, + std::vector Counts, + std::vector BitmapBytes) + : InstrProfRecord(std::move(Counts), std::move(BitmapBytes)), Name(Name), + Hash(Hash) {} static bool hasCSFlagInHash(uint64_t FuncHash) { return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1); @@ -1014,7 +1028,9 @@ enum ProfVersion { Version9 = 9, // An additional (optional) temporal profile traces section is added. Version10 = 10, - // The current version is 10. + // An additional field is used for bitmap bytes. + Version11 = 11, + // The current version is 11. CurrentVersion = INSTR_PROF_INDEX_VERSION }; const uint64_t Version = ProfVersion::CurrentVersion; @@ -1152,6 +1168,7 @@ namespace RawInstrProf { // Version 6: Added binary id. // Version 7: Reorder binary id and include version in signature. // Version 8: Use relative counter pointer. +// Version 9: Added relative bitmap bytes pointer and count used by MC/DC. const uint64_t Version = INSTR_PROF_RAW_VERSION; template inline uint64_t getMagic(); diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index 13be2753e514ef..fa17ac370f6964 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -76,6 +76,7 @@ INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ Inc->getHash()->getZExtValue())) INSTR_PROF_DATA(const IntPtrT, IntPtrTy, CounterPtr, RelativeCounterPtr) +INSTR_PROF_DATA(const IntPtrT, IntPtrTy, BitmapPtr, RelativeBitmapPtr) /* This is used to map function pointers for the indirect call targets to * function name hashes during the conversion from raw to merged profile * data. @@ -87,7 +88,9 @@ INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \ INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \ ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters)) INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \ - ConstantArray::get(Int16ArrayTy, Int16ArrayVals)) + ConstantArray::get(Int16ArrayTy, Int16ArrayVals)) \ +INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumBitmapBytes, \ + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumBitmapBytes)) #undef INSTR_PROF_DATA /* INSTR_PROF_DATA end. */ @@ -132,9 +135,13 @@ INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) INSTR_PROF_RAW_HEADER(uint64_t, NumCounters, NumCounters) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters) +INSTR_PROF_RAW_HEADER(uint64_t, NumBitmapBytes, NumBitmapBytes) +INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterBitmapBytes, PaddingBytesAfterBitmapBytes) INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin - (uintptr_t)DataBegin) +INSTR_PROF_RAW_HEADER(uint64_t, BitmapDelta, + (uintptr_t)BitmapBegin - (uintptr_t)DataBegin) INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) #undef INSTR_PROF_RAW_HEADER @@ -267,6 +274,9 @@ INSTR_PROF_SECT_ENTRY(IPSK_data, \ INSTR_PROF_SECT_ENTRY(IPSK_cnts, \ INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \ INSTR_PROF_CNTS_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_bitmap, \ + INSTR_PROF_QUOTE(INSTR_PROF_BITS_COMMON), \ + INSTR_PROF_BITS_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_name, \ INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \ INSTR_PROF_NAME_COFF, "__DATA,") @@ -645,11 +655,11 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 /* Raw profile format version (start from 1). */ -#define INSTR_PROF_RAW_VERSION 8 +#define INSTR_PROF_RAW_VERSION 9 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 10 +#define INSTR_PROF_INDEX_VERSION 11 /* Coverage mapping format version (start from 0). */ -#define INSTR_PROF_COVMAP_VERSION 5 +#define INSTR_PROF_COVMAP_VERSION 6 /* Profile version is always of type uint64_t. Reserve the upper 32 bits in the * version for other variants of profile. We set the 8th most significant bit @@ -686,6 +696,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_DATA_COMMON __llvm_prf_data #define INSTR_PROF_NAME_COMMON __llvm_prf_names #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts +#define INSTR_PROF_BITS_COMMON __llvm_prf_bits #define INSTR_PROF_VALS_COMMON __llvm_prf_vals #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds #define INSTR_PROF_COVMAP_COMMON __llvm_covmap @@ -697,6 +708,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_DATA_COFF ".lprfd$M" #define INSTR_PROF_NAME_COFF ".lprfn$M" #define INSTR_PROF_CNTS_COFF ".lprfc$M" +#define INSTR_PROF_BITS_COFF ".lprfb$M" #define INSTR_PROF_VALS_COFF ".lprfv$M" #define INSTR_PROF_VNODES_COFF ".lprfnd$M" #define INSTR_PROF_COVMAP_COFF ".lcovmap$M" @@ -708,6 +720,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_DATA_COFF #define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_NAME_COFF #define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_CNTS_COFF +#define INSTR_PROF_BITS_SECT_NAME INSTR_PROF_BITS_COFF /* Array of pointers. Each pointer points to a list * of value nodes associated with one value site. */ @@ -722,6 +735,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON) #define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON) #define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON) +#define INSTR_PROF_BITS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_BITS_COMMON) /* Array of pointers. Each pointer points to a list * of value nodes associated with one value site. */ diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 5f54cbeb1b01ed..cf6429a324d36b 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -323,11 +323,14 @@ class RawInstrProfReader : public InstrProfReader { // the variant types of the profile. uint64_t Version; uint64_t CountersDelta; + uint64_t BitmapDelta; uint64_t NamesDelta; const RawInstrProf::ProfileData *Data; const RawInstrProf::ProfileData *DataEnd; const char *CountersStart; const char *CountersEnd; + const char *BitmapStart; + const char *BitmapEnd; const char *NamesStart; const char *NamesEnd; // After value profile is all read, this pointer points to @@ -428,6 +431,7 @@ class RawInstrProfReader : public InstrProfReader { Error readName(NamedInstrProfRecord &Record); Error readFuncHash(NamedInstrProfRecord &Record); Error readRawCounts(InstrProfRecord &Record); + Error readRawBitmapBytes(InstrProfRecord &Record); Error readValueProfilingData(InstrProfRecord &Record); bool atEnd() const { return Data == DataEnd; } @@ -440,6 +444,7 @@ class RawInstrProfReader : public InstrProfReader { // As we advance to the next record, we maintain the correct CountersDelta // with respect to the next record. CountersDelta -= sizeof(*Data); + BitmapDelta -= sizeof(*Data); } Data++; ValueDataStart += CurValueDataSize; @@ -733,6 +738,10 @@ class IndexedInstrProfReader : public InstrProfReader { Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector &Counts); + /// Fill Bitmap Bytes with the profile data for the given function name. + Error getFunctionBitmapBytes(StringRef FuncName, uint64_t FuncHash, + std::vector &BitmapBytes); + /// Return the maximum of all known function counts. /// \c UseCS indicates whether to use the context-sensitive count. uint64_t getMaximumFunctionCount(bool UseCS) { diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h index cb0c055dcb74ae..d8f3e75087ace6 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -50,6 +50,7 @@ class InstrProfiling : public PassInfoMixin { uint32_t NumValueSites[IPVK_Last + 1]; GlobalVariable *RegionCounters = nullptr; GlobalVariable *DataVar = nullptr; + GlobalVariable *RegionBitmaps = nullptr; PerFunctionProfileData() { memset(NumValueSites, 0, sizeof(uint32_t) * (IPVK_Last + 1)); @@ -105,20 +106,59 @@ class InstrProfiling : public PassInfoMixin { /// Force emitting of name vars for unused functions. void lowerCoverageData(GlobalVariable *CoverageNamesVar); + /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction + /// using the index represented by the a temp value into a bitmap. + void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins); + + /// Replace instrprof.mcdc.temp.update with a shift and or instruction using + /// the corresponding condition ID. + void lowerMCDCCondBitmapUpdate(InstrProfMCDCCondBitmapUpdate *Ins); + /// Compute the address of the counter value that this profiling instruction /// acts on. - Value *getCounterAddress(InstrProfInstBase *I); + Value *getCounterAddress(InstrProfCntrInstBase *I); /// Get the region counters for an increment, creating them if necessary. /// /// If the counter array doesn't yet exist, the profile data variables /// referring to them will also be created. - GlobalVariable *getOrCreateRegionCounters(InstrProfInstBase *Inc); + GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc); /// Create the region counters. - GlobalVariable *createRegionCounters(InstrProfInstBase *Inc, StringRef Name, + GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc, + StringRef Name, GlobalValue::LinkageTypes Linkage); + /// Compute the address of the test vector bitmap that this profiling + /// instruction acts on. + Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I); + + /// Get the region bitmaps for an increment, creating them if necessary. + /// + /// If the bitmap array doesn't yet exist, the profile data variables + /// referring to them will also be created. + GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc); + + /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with + /// an MC/DC Decision region. The number of bytes required is indicated by + /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called + /// as part of setupProfileSection() and is conceptually very similar to + /// what is done for profile data counters in createRegionCounters(). + GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc, + StringRef Name, + GlobalValue::LinkageTypes Linkage); + + /// Set Comdat property of GV, if required. + void maybeSetComdat(GlobalVariable *GV, Function *Fn, StringRef VarName); + + /// Setup the sections into which counters and bitmaps are allocated. + GlobalVariable *setupProfileSection(InstrProfInstBase *Inc, + InstrProfSectKind IPSK); + + /// Create INSTR_PROF_DATA variable for counters and bitmaps. + void createDataVariable(InstrProfCntrInstBase *Inc, + InstrProfMCDCBitmapParameters *Update); + /// Emit the section with compressed function names. void emitNameData(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 71f6a3791c2cee..0e6129aaf52192 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7197,6 +7197,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, llvm_unreachable("instrprof failed to lower a timestamp"); case Intrinsic::instrprof_value_profile: llvm_unreachable("instrprof failed to lower a value profiling call"); + case Intrinsic::instrprof_mcdc_parameters: + llvm_unreachable("instrprof failed to lower mcdc parameters"); + case Intrinsic::instrprof_mcdc_tvbitmap_update: + llvm_unreachable("instrprof failed to lower an mcdc tvbitmap update"); + case Intrinsic::instrprof_mcdc_condbitmap_update: + llvm_unreachable("instrprof failed to lower an mcdc condbitmap update"); case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index e4ddd57575355f..20ae08dd128300 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -270,13 +270,13 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef NameTable, return -1; } -ConstantInt *InstrProfInstBase::getNumCounters() const { +ConstantInt *InstrProfCntrInstBase::getNumCounters() const { if (InstrProfValueProfileInst::classof(this)) llvm_unreachable("InstrProfValueProfileInst does not have counters!"); return cast(const_cast(getArgOperand(2))); } -ConstantInt *InstrProfInstBase::getIndex() const { +ConstantInt *InstrProfCntrInstBase::getIndex() const { if (InstrProfValueProfileInst::classof(this)) llvm_unreachable("Please use InstrProfValueProfileInst::getIndex()"); return cast(const_cast(getArgOperand(3))); diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp index 857498256ec54c..d6aade6fcd0f8d 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -783,6 +783,7 @@ Expected> CovMapFuncRecordReader::get( case CovMapVersion::Version4: case CovMapVersion::Version5: case CovMapVersion::Version6: + case CovMapVersion::Version7: // Decompress the name data. if (Error E = P.create(P.getNameData())) return std::move(E); @@ -801,6 +802,9 @@ Expected> CovMapFuncRecordReader::get( else if (Version == CovMapVersion::Version6) return std::make_unique>(P, R, D, F); + else if (Version == CovMapVersion::Version7) + return std::make_unique>(P, R, D, F); } llvm_unreachable("Unsupported version"); } diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 0cb296b3bde6c5..583415ff451a08 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -136,6 +136,9 @@ static std::string getInstrProfErrString(instrprof_error Err, case instrprof_error::count_mismatch: OS << "function basic block count change detected (counter mismatch)"; break; + case instrprof_error::bitmap_mismatch: + OS << "function bitmap size change detected (bitmap size mismatch)"; + break; case instrprof_error::counter_overflow: OS << "counter overflow"; break; @@ -815,6 +818,18 @@ void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight, Warn(instrprof_error::counter_overflow); } + // If the number of bitmap bytes doesn't match we either have bad data + // or a hash collision. + if (BitmapBytes.size() != Other.BitmapBytes.size()) { + Warn(instrprof_error::bitmap_mismatch); + return; + } + + // Bitmap bytes are merged by simply ORing them together. + for (size_t I = 0, E = Other.BitmapBytes.size(); I < E; ++I) { + BitmapBytes[I] = Other.BitmapBytes[I] | BitmapBytes[I]; + } + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) mergeValueProfData(Kind, Other, Weight, Warn); } @@ -1487,9 +1502,11 @@ Expected
Header::readFromBuffer(const unsigned char *Buffer) { // When a new field is added in the header add a case statement here to // populate it. static_assert( - IndexedInstrProf::ProfVersion::CurrentVersion == Version10, + IndexedInstrProf::ProfVersion::CurrentVersion == Version11, "Please update the reading code below if a new field has been added, " "if not add a case statement to fall through to the latest version."); + case 11ull: + [[fallthrough]]; case 10ull: H.TemporalProfTracesOffset = read(Buffer, offsetOf(&Header::TemporalProfTracesOffset)); @@ -1513,10 +1530,12 @@ size_t Header::size() const { // When a new field is added to the header add a case statement here to // compute the size as offset of the new field + size of the new field. This // relies on the field being added to the end of the list. - static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version10, + static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version11, "Please update the size computation below if a new field has " "been added to the header, if not add a case statement to " "fall through to the latest version."); + case 11ull: + [[fallthrough]]; case 10ull: return offsetOf(&Header::TemporalProfTracesOffset) + sizeof(Header::TemporalProfTracesOffset); diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp index f298fcab1220cf..2138368500bed0 100644 --- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp +++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp @@ -210,11 +210,15 @@ void InstrProfCorrelatorImpl::addProbe(StringRef FunctionName, // In this mode, CounterPtr actually stores the section relative address // of the counter. maybeSwap(CounterOffset), + // TODO: MC/DC is not yet supported. + /*BitmapOffset=*/maybeSwap(0), maybeSwap(FunctionPtr), // TODO: Value profiling is not yet supported. /*ValuesPtr=*/maybeSwap(0), maybeSwap(NumCounters), /*NumValueSites=*/{maybeSwap(0), maybeSwap(0)}, + // TODO: MC/DC is not yet supported. + /*NumBitmapBytes=*/maybeSwap(0), }); NamesVec.push_back(FunctionName.str()); } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index a920a31d0a4b22..31d3ff2bcbf6ca 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -434,6 +434,29 @@ Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { Record.Counts.push_back(Count); } + // Bitmap byte information is indicated with special character. + if (Line->startswith("$")) { + Record.BitmapBytes.clear(); + // Read the number of bitmap bytes. + uint64_t NumBitmapBytes; + if ((Line++)->drop_front(1).trim().getAsInteger(0, NumBitmapBytes)) + return error(instrprof_error::malformed, + "number of bitmap bytes is not a valid integer"); + if (NumBitmapBytes != 0) { + // Read each bitmap and fill our internal storage with the values. + Record.BitmapBytes.reserve(NumBitmapBytes); + for (uint8_t I = 0; I < NumBitmapBytes; ++I) { + if (Line.is_at_end()) + return error(instrprof_error::truncated); + uint8_t BitmapByte; + if ((Line++)->getAsInteger(0, BitmapByte)) + return error(instrprof_error::malformed, + "bitmap byte is not a valid integer"); + Record.BitmapBytes.push_back(BitmapByte); + } + } + } + // Check if value profile data exists and read it if so. if (Error E = readValueProfileData(Record)) return error(std::move(E)); @@ -550,11 +573,14 @@ Error RawInstrProfReader::readHeader( return error(instrprof_error::bad_header); CountersDelta = swap(Header.CountersDelta); + BitmapDelta = swap(Header.BitmapDelta); NamesDelta = swap(Header.NamesDelta); auto NumData = swap(Header.NumData); auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); auto CountersSize = swap(Header.NumCounters) * getCounterTypeSize(); auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); + auto NumBitmapBytes = swap(Header.NumBitmapBytes); + auto PaddingBytesAfterBitmapBytes = swap(Header.PaddingBytesAfterBitmapBytes); auto NamesSize = swap(Header.NamesSize); ValueKindLast = swap(Header.ValueKindLast); @@ -564,8 +590,10 @@ Error RawInstrProfReader::readHeader( // Profile data starts after profile header and binary ids if exist. ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize; ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters; - ptrdiff_t NamesOffset = + ptrdiff_t BitmapOffset = CountersOffset + CountersSize + PaddingBytesAfterCounters; + ptrdiff_t NamesOffset = + BitmapOffset + NumBitmapBytes + PaddingBytesAfterBitmapBytes; ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; auto *Start = reinterpret_cast(&Header); @@ -594,6 +622,8 @@ Error RawInstrProfReader::readHeader( reinterpret_cast(&Header) + sizeof(RawInstrProf::Header); CountersStart = Start + CountersOffset; CountersEnd = CountersStart + CountersSize; + BitmapStart = Start + BitmapOffset; + BitmapEnd = BitmapStart + NumBitmapBytes; ValueDataStart = reinterpret_cast(Start + ValueDataOffset); const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); @@ -684,6 +714,49 @@ Error RawInstrProfReader::readRawCounts( return success(); } +template +Error RawInstrProfReader::readRawBitmapBytes(InstrProfRecord &Record) { + uint32_t NumBitmapBytes = swap(Data->NumBitmapBytes); + + Record.BitmapBytes.clear(); + Record.BitmapBytes.reserve(NumBitmapBytes); + + // It's possible MCDC is either not enabled or only used for some functions + // and not others. So if we record 0 bytes, just move on. + if (NumBitmapBytes == 0) + return success(); + + // BitmapDelta decreases as we advance to the next data record. + ptrdiff_t BitmapOffset = swap(Data->BitmapPtr) - BitmapDelta; + if (BitmapOffset < 0) + return error( + instrprof_error::malformed, + ("bitmap offset " + Twine(BitmapOffset) + " is negative").str()); + + if (BitmapOffset >= BitmapEnd - BitmapStart) + return error(instrprof_error::malformed, + ("bitmap offset " + Twine(BitmapOffset) + + " is greater than the maximum bitmap offset " + + Twine(BitmapEnd - BitmapStart - 1)) + .str()); + + uint64_t MaxNumBitmapBytes = + (BitmapEnd - (BitmapStart + BitmapOffset)) / sizeof(uint8_t); + if (NumBitmapBytes > MaxNumBitmapBytes) + return error(instrprof_error::malformed, + ("number of bitmap bytes " + Twine(NumBitmapBytes) + + " is greater than the maximum number of bitmap bytes " + + Twine(MaxNumBitmapBytes)) + .str()); + + for (uint32_t I = 0; I < NumBitmapBytes; I++) { + const char *Ptr = BitmapStart + BitmapOffset + I; + Record.BitmapBytes.push_back(swap(*Ptr)); + } + + return success(); +} + template Error RawInstrProfReader::readValueProfilingData( InstrProfRecord &Record) { @@ -734,6 +807,10 @@ Error RawInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) if (Error E = readRawCounts(Record)) return error(std::move(E)); + // Read raw bitmap bytes and set Record. + if (Error E = readRawBitmapBytes(Record)) + return error(std::move(E)); + // Read value data and set Record. if (Error E = readValueProfilingData(Record)) return error(std::move(E)); @@ -795,6 +872,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, DataBuffer.clear(); std::vector CounterBuffer; + std::vector BitmapByteBuffer; const unsigned char *End = D + N; while (D < End) { @@ -823,7 +901,26 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, CounterBuffer.push_back( endian::readNext(D)); - DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); + // Read bitmap bytes for GET_VERSION(FormatVersion) > 10. + if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version10) { + uint64_t BitmapBytes = 0; + if (D + sizeof(uint64_t) > End) + return data_type(); + BitmapBytes = + endian::readNext(D); + // Read bitmap byte values. + if (D + BitmapBytes * sizeof(uint8_t) > End) + return data_type(); + BitmapByteBuffer.clear(); + BitmapByteBuffer.reserve(BitmapBytes); + for (uint64_t J = 0; J < BitmapBytes; ++J) + BitmapByteBuffer.push_back(static_cast( + endian::readNext( + D))); + } + + DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer), + std::move(BitmapByteBuffer)); // Read value profiling data. if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && @@ -1337,6 +1434,16 @@ Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, return success(); } +Error IndexedInstrProfReader::getFunctionBitmapBytes( + StringRef FuncName, uint64_t FuncHash, std::vector &BitmapBytes) { + Expected Record = getInstrProfRecord(FuncName, FuncHash); + if (Error E = Record.takeError()) + return error(std::move(E)); + + BitmapBytes = Record.get().BitmapBytes; + return success(); +} + Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { ArrayRef Data; diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 6892654b00ea46..85339b39c74c4c 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -132,6 +132,8 @@ class InstrProfRecordWriterTrait { M += sizeof(uint64_t); // The function hash M += sizeof(uint64_t); // The size of the Counts vector M += ProfRecord.Counts.size() * sizeof(uint64_t); + M += sizeof(uint64_t); // The size of the Bitmap vector + M += ProfRecord.BitmapBytes.size() * sizeof(uint64_t); // Value data M += ValueProfData::getSize(ProfileData.second); @@ -161,6 +163,10 @@ class InstrProfRecordWriterTrait { for (uint64_t I : ProfRecord.Counts) LE.write(I); + LE.write(ProfRecord.BitmapBytes.size()); + for (uint64_t I : ProfRecord.BitmapBytes) + LE.write(I); + // Write value data std::unique_ptr VDataPtr = ValueProfData::serializeFrom(ProfileData.second); @@ -380,6 +386,8 @@ bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { const InstrProfRecord &IPR = Func.second; if (llvm::any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; })) return true; + if (llvm::any_of(IPR.BitmapBytes, [](uint8_t Byte) { return Byte > 0; })) + return true; } return false; } @@ -703,6 +711,17 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, for (uint64_t Count : Func.Counts) OS << Count << "\n"; + if (Func.BitmapBytes.size() > 0) { + OS << "# Num Bitmap Bytes:\n$" << Func.BitmapBytes.size() << "\n"; + OS << "# Bitmap Byte Values:\n"; + for (uint8_t Byte : Func.BitmapBytes) { + OS << "0x"; + OS.write_hex(Byte); + OS << "\n"; + } + OS << "\n"; + } + uint32_t NumValueKinds = Func.getNumValueKinds(); if (!NumValueKinds) { OS << "\n"; diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 57fcfd53836911..55eef2b76e9be2 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -432,6 +432,15 @@ bool InstrProfiling::lowerIntrinsics(Function *F) { } else if (auto *IPVP = dyn_cast(&Instr)) { lowerValueProfileInst(IPVP); MadeChange = true; + } else if (auto *IPMP = dyn_cast(&Instr)) { + IPMP->eraseFromParent(); + MadeChange = true; + } else if (auto *IPBU = dyn_cast(&Instr)) { + lowerMCDCTestVectorBitmapUpdate(IPBU); + MadeChange = true; + } else if (auto *IPTU = dyn_cast(&Instr)) { + lowerMCDCCondBitmapUpdate(IPTU); + MadeChange = true; } } } @@ -546,19 +555,33 @@ bool InstrProfiling::run( // the instrumented function. This is counting the number of instrumented // target value sites to enter it as field in the profile data variable. for (Function &F : M) { - InstrProfInstBase *FirstProfInst = nullptr; - for (BasicBlock &BB : F) - for (auto I = BB.begin(), E = BB.end(); I != E; I++) + InstrProfCntrInstBase *FirstProfInst = nullptr; + InstrProfMCDCBitmapParameters *FirstProfMCDCParams = nullptr; + for (BasicBlock &BB : F) { + for (auto I = BB.begin(), E = BB.end(); I != E; I++) { if (auto *Ind = dyn_cast(I)) computeNumValueSiteCounts(Ind); - else if (FirstProfInst == nullptr && - (isa(I) || isa(I))) - FirstProfInst = dyn_cast(I); + else { + if (FirstProfInst == nullptr && + (isa(I) || isa(I))) + FirstProfInst = dyn_cast(I); + if (FirstProfMCDCParams == nullptr) + FirstProfMCDCParams = dyn_cast(I); + } + } + } + + // If the MCDCBitmapParameters intrinsic was seen, create the bitmaps. + if (FirstProfMCDCParams != nullptr) { + static_cast(getOrCreateRegionBitmaps(FirstProfMCDCParams)); + } - // Value profiling intrinsic lowering requires per-function profile data - // variable to be created first. - if (FirstProfInst != nullptr) + // Use a profile intrinsic to create the region counters and data variable. + // Also create the data variable based on the MCDCParams. + if (FirstProfInst != nullptr) { static_cast(getOrCreateRegionCounters(FirstProfInst)); + createDataVariable(FirstProfInst, FirstProfMCDCParams); + } } for (Function &F : M) @@ -672,7 +695,7 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Ind->eraseFromParent(); } -Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) { +Value *InstrProfiling::getCounterAddress(InstrProfCntrInstBase *I) { auto *Counters = getOrCreateRegionCounters(I); IRBuilder<> Builder(I); @@ -712,6 +735,25 @@ Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) { return Builder.CreateIntToPtr(Add, Addr->getType()); } +Value *InstrProfiling::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) { + auto *Bitmaps = getOrCreateRegionBitmaps(I); + IRBuilder<> Builder(I); + + auto *Addr = Builder.CreateConstInBoundsGEP2_32( + Bitmaps->getValueType(), Bitmaps, 0, I->getBitmapIndex()->getZExtValue()); + + if (isRuntimeCounterRelocationEnabled()) { + LLVMContext &Ctx = M->getContext(); + Ctx.diagnose(DiagnosticInfoPGOProfile( + M->getName().data(), + Twine("Runtime counter relocation is presently not supported for MC/DC " + "bitmaps."), + DS_Warning)); + } + + return Addr; +} + void InstrProfiling::lowerCover(InstrProfCoverInst *CoverInstruction) { auto *Addr = getCounterAddress(CoverInstruction); IRBuilder<> Builder(CoverInstruction); @@ -771,6 +813,86 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) { CoverageNamesVar->eraseFromParent(); } +void InstrProfiling::lowerMCDCTestVectorBitmapUpdate( + InstrProfMCDCTVBitmapUpdate *Update) { + IRBuilder<> Builder(Update); + auto *Int8Ty = Type::getInt8Ty(M->getContext()); + auto *Int8PtrTy = Type::getInt8PtrTy(M->getContext()); + auto *Int32Ty = Type::getInt32Ty(M->getContext()); + auto *Int64Ty = Type::getInt64Ty(M->getContext()); + auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr(); + auto *BitmapAddr = getBitmapAddress(Update); + + // Load Temp Val. + // %mcdc.temp = load i32, ptr %mcdc.addr, align 4 + auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"); + + // Calculate byte offset using div8. + // %1 = lshr i32 %mcdc.temp, 3 + auto *BitmapByteOffset = Builder.CreateLShr(Temp, 0x3); + + // Add byte offset to section base byte address. + // %2 = zext i32 %1 to i64 + // %3 = add i64 ptrtoint (ptr @__profbm_test to i64), %2 + auto *BitmapByteAddr = + Builder.CreateAdd(Builder.CreatePtrToInt(BitmapAddr, Int64Ty), + Builder.CreateZExtOrBitCast(BitmapByteOffset, Int64Ty)); + + // Convert to a pointer. + // %4 = inttoptr i32 %3 to ptr + BitmapByteAddr = Builder.CreateIntToPtr(BitmapByteAddr, Int8PtrTy); + + // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8) + // %5 = and i32 %mcdc.temp, 7 + // %6 = trunc i32 %5 to i8 + auto *BitToSet = Builder.CreateTrunc(Builder.CreateAnd(Temp, 0x7), Int8Ty); + + // Shift bit offset left to form a bitmap. + // %7 = shl i8 1, %6 + auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet); + + // Load profile bitmap byte. + // %mcdc.bits = load i8, ptr %4, align 1 + auto *Bitmap = Builder.CreateLoad(Int8Ty, BitmapByteAddr, "mcdc.bits"); + + // Perform logical OR of profile bitmap byte and shifted bit offset. + // %8 = or i8 %mcdc.bits, %7 + auto *Result = Builder.CreateOr(Bitmap, ShiftedVal); + + // Store the updated profile bitmap byte. + // store i8 %8, ptr %3, align 1 + Builder.CreateStore(Result, BitmapByteAddr); + Update->eraseFromParent(); +} + +void InstrProfiling::lowerMCDCCondBitmapUpdate( + InstrProfMCDCCondBitmapUpdate *Update) { + IRBuilder<> Builder(Update); + auto *Int32Ty = Type::getInt32Ty(M->getContext()); + auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr(); + + // Load the MCDC temporary value from the stack. + // %mcdc.temp = load i32, ptr %mcdc.addr, align 4 + auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"); + + // Zero-extend the evaluated condition boolean value (0 or 1) by 32bits. + // %1 = zext i1 %tobool to i32 + auto *CondV_32 = Builder.CreateZExt(Update->getCondBool(), Int32Ty); + + // Shift the boolean value left (by the condition's ID) to form a bitmap. + // %2 = shl i32 %1, getCondID()> + auto *ShiftedVal = Builder.CreateShl(CondV_32, Update->getCondID()); + + // Perform logical OR of the bitmap against the loaded MCDC temporary value. + // %3 = or i32 %mcdc.temp, %2 + auto *Result = Builder.CreateOr(Temp, ShiftedVal); + + // Store the updated temporary value back to the stack. + // store i32 %3, ptr %mcdc.addr, align 4 + Builder.CreateStore(Result, MCDCCondBitmapAddr); + Update->eraseFromParent(); +} + /// Get the name of a profiling variable for a particular function. static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix, bool &Renamed) { @@ -926,37 +1048,31 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { return true; } -GlobalVariable * -InstrProfiling::createRegionCounters(InstrProfInstBase *Inc, StringRef Name, - GlobalValue::LinkageTypes Linkage) { - uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); - auto &Ctx = M->getContext(); - GlobalVariable *GV; - if (isa(Inc)) { - auto *CounterTy = Type::getInt8Ty(Ctx); - auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters); - // TODO: `Constant::getAllOnesValue()` does not yet accept an array type. - std::vector InitialValues(NumCounters, - Constant::getAllOnesValue(CounterTy)); - GV = new GlobalVariable(*M, CounterArrTy, false, Linkage, - ConstantArray::get(CounterArrTy, InitialValues), - Name); - GV->setAlignment(Align(1)); - } else { - auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); - GV = new GlobalVariable(*M, CounterTy, false, Linkage, - Constant::getNullValue(CounterTy), Name); - GV->setAlignment(Align(8)); - } - return GV; +void InstrProfiling::maybeSetComdat(GlobalVariable *GV, Function *Fn, + StringRef VarName) { + bool DataReferencedByCode = profDataReferencedByCode(*M); + bool NeedComdat = needsComdatForCounter(*Fn, *M); + bool UseComdat = (NeedComdat || TT.isOSBinFormatELF()); + + if (!UseComdat) + return; + + StringRef GroupName = + TT.isOSBinFormatCOFF() && DataReferencedByCode ? GV->getName() : VarName; + Comdat *C = M->getOrInsertComdat(GroupName); + if (!NeedComdat) + C->setSelectionKind(Comdat::NoDeduplicate); + GV->setComdat(C); + // COFF doesn't allow the comdat group leader to have private linkage, so + // upgrade private linkage to internal linkage to produce a symbol table + // entry. + if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage()) + GV->setLinkage(GlobalValue::InternalLinkage); } -GlobalVariable * -InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { +GlobalVariable *InstrProfiling::setupProfileSection(InstrProfInstBase *Inc, + InstrProfSectKind IPSK) { GlobalVariable *NamePtr = Inc->getName(); - auto &PD = ProfileDataMap[NamePtr]; - if (PD.RegionCounters) - return PD.RegionCounters; // Match the linkage and visibility of the name global. Function *Fn = Inc->getParent()->getParent(); @@ -995,42 +1111,100 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { // nodeduplicate COMDAT which is lowered to a zero-flag section group. This // allows -z start-stop-gc to discard the entire group when the function is // discarded. - bool DataReferencedByCode = profDataReferencedByCode(*M); - bool NeedComdat = needsComdatForCounter(*Fn, *M); bool Renamed; - std::string CntsVarName = - getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed); - std::string DataVarName = - getVarName(Inc, getInstrProfDataVarPrefix(), Renamed); - auto MaybeSetComdat = [&](GlobalVariable *GV) { - bool UseComdat = (NeedComdat || TT.isOSBinFormatELF()); - if (UseComdat) { - StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode - ? GV->getName() - : CntsVarName; - Comdat *C = M->getOrInsertComdat(GroupName); - if (!NeedComdat) - C->setSelectionKind(Comdat::NoDeduplicate); - GV->setComdat(C); - // COFF doesn't allow the comdat group leader to have private linkage, so - // upgrade private linkage to internal linkage to produce a symbol table - // entry. - if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage()) - GV->setLinkage(GlobalValue::InternalLinkage); - } - }; + GlobalVariable *Ptr; + StringRef VarPrefix; + std::string VarName; + if (IPSK == IPSK_cnts) { + VarPrefix = getInstrProfCountersVarPrefix(); + VarName = getVarName(Inc, VarPrefix, Renamed); + InstrProfCntrInstBase *CntrIncrement = dyn_cast(Inc); + Ptr = createRegionCounters(CntrIncrement, VarName, Linkage); + } else if (IPSK == IPSK_bitmap) { + VarPrefix = getInstrProfBitmapVarPrefix(); + VarName = getVarName(Inc, VarPrefix, Renamed); + InstrProfMCDCBitmapInstBase *BitmapUpdate = + dyn_cast(Inc); + Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage); + } else { + llvm_unreachable("Profile Section must be for Counters or Bitmaps"); + } + + Ptr->setVisibility(Visibility); + // Put the counters and bitmaps in their own sections so linkers can + // remove unneeded sections. + Ptr->setSection(getInstrProfSectionName(IPSK, TT.getObjectFormat())); + Ptr->setLinkage(Linkage); + maybeSetComdat(Ptr, Fn, VarName); + return Ptr; +} + +GlobalVariable * +InstrProfiling::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc, + StringRef Name, + GlobalValue::LinkageTypes Linkage) { + uint64_t NumBytes = Inc->getNumBitmapBytes()->getZExtValue(); + auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M->getContext()), NumBytes); + auto GV = new GlobalVariable(*M, BitmapTy, false, Linkage, + Constant::getNullValue(BitmapTy), Name); + GV->setAlignment(Align(1)); + return GV; +} + +GlobalVariable * +InstrProfiling::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) { + GlobalVariable *NamePtr = Inc->getName(); + auto &PD = ProfileDataMap[NamePtr]; + if (PD.RegionBitmaps) + return PD.RegionBitmaps; + + // If RegionBitmaps doesn't already exist, create it by first setting up + // the corresponding profile section. + auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap); + PD.RegionBitmaps = BitmapPtr; + return PD.RegionBitmaps; +} +GlobalVariable * +InstrProfiling::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name, + GlobalValue::LinkageTypes Linkage) { uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); - LLVMContext &Ctx = M->getContext(); + auto &Ctx = M->getContext(); + GlobalVariable *GV; + if (isa(Inc)) { + auto *CounterTy = Type::getInt8Ty(Ctx); + auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters); + // TODO: `Constant::getAllOnesValue()` does not yet accept an array type. + std::vector InitialValues(NumCounters, + Constant::getAllOnesValue(CounterTy)); + GV = new GlobalVariable(*M, CounterArrTy, false, Linkage, + ConstantArray::get(CounterArrTy, InitialValues), + Name); + GV->setAlignment(Align(1)); + } else { + auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); + GV = new GlobalVariable(*M, CounterTy, false, Linkage, + Constant::getNullValue(CounterTy), Name); + GV->setAlignment(Align(8)); + } + return GV; +} + +GlobalVariable * +InstrProfiling::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) { + GlobalVariable *NamePtr = Inc->getName(); + auto &PD = ProfileDataMap[NamePtr]; + if (PD.RegionCounters) + return PD.RegionCounters; - auto *CounterPtr = createRegionCounters(Inc, CntsVarName, Linkage); - CounterPtr->setVisibility(Visibility); - CounterPtr->setSection( - getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat())); - CounterPtr->setLinkage(Linkage); - MaybeSetComdat(CounterPtr); + // If RegionCounters doesn't already exist, create it by first setting up + // the corresponding profile section. + auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts); PD.RegionCounters = CounterPtr; + if (DebugInfoCorrelate) { + LLVMContext &Ctx = M->getContext(); + Function *Fn = Inc->getParent()->getParent(); if (auto *SP = Fn->getSubprogram()) { DIBuilder DB(*M, true, SP->getUnit()); Metadata *FunctionNameAnnotation[] = { @@ -1059,8 +1233,50 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { CounterPtr->addDebugInfo(DICounter); DB.finalize(); } + + // Mark the counter variable as used so that it isn't optimized out. + CompilerUsedVars.push_back(PD.RegionCounters); } + return PD.RegionCounters; +} + +void InstrProfiling::createDataVariable(InstrProfCntrInstBase *Inc, + InstrProfMCDCBitmapParameters *Params) { + // When debug information is correlated to profile data, a data variable + // is not needed. + if (DebugInfoCorrelate) + return; + + GlobalVariable *NamePtr = Inc->getName(); + auto &PD = ProfileDataMap[NamePtr]; + + LLVMContext &Ctx = M->getContext(); + + Function *Fn = Inc->getParent()->getParent(); + GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage(); + GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility(); + + // Due to the limitation of binder as of 2021/09/28, the duplicate weak + // symbols in the same csect won't be discarded. When there are duplicate weak + // symbols, we can NOT guarantee that the relocations get resolved to the + // intended weak symbol, so we can not ensure the correctness of the relative + // CounterPtr, so we have to use private linkage for counter and data symbols. + if (TT.isOSBinFormatXCOFF()) { + Linkage = GlobalValue::PrivateLinkage; + Visibility = GlobalValue::DefaultVisibility; + } + + bool DataReferencedByCode = profDataReferencedByCode(*M); + bool NeedComdat = needsComdatForCounter(*Fn, *M); + bool Renamed; + + // The Data Variable section is anchored to profile counters. + std::string CntsVarName = + getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed); + std::string DataVarName = + getVarName(Inc, getInstrProfDataVarPrefix(), Renamed); + auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); // Allocate statically the array of pointers to value profile nodes for // the current function. @@ -1078,16 +1294,17 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { ValuesVar->setSection( getInstrProfSectionName(IPSK_vals, TT.getObjectFormat())); ValuesVar->setAlignment(Align(8)); - MaybeSetComdat(ValuesVar); + maybeSetComdat(ValuesVar, Fn, CntsVarName); ValuesPtrExpr = ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx)); } - if (DebugInfoCorrelate) { - // Mark the counter variable as used so that it isn't optimized out. - CompilerUsedVars.push_back(PD.RegionCounters); - return PD.RegionCounters; - } + uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); + auto *CounterPtr = PD.RegionCounters; + + uint64_t NumBitmapBytes = 0; + if (Params != nullptr) + NumBitmapBytes = Params->getNumBitmapBytes()->getZExtValue(); // Create data variable. auto *IntPtrTy = M->getDataLayout().getIntPtrType(M->getContext()); @@ -1130,6 +1347,16 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy), ConstantExpr::getPtrToInt(Data, IntPtrTy)); + // Bitmaps are relative to the same data variable as profile counters. + GlobalVariable *BitmapPtr = PD.RegionBitmaps; + Constant *RelativeBitmapPtr = ConstantInt::get(IntPtrTy, 0); + + if (BitmapPtr != nullptr) { + RelativeBitmapPtr = + ConstantExpr::getSub(ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy), + ConstantExpr::getPtrToInt(Data, IntPtrTy)); + } + Constant *DataVals[] = { #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init, #include "llvm/ProfileData/InstrProfData.inc" @@ -1139,7 +1366,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { Data->setVisibility(Visibility); Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat())); Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT)); - MaybeSetComdat(Data); + maybeSetComdat(Data, Fn, CntsVarName); PD.DataVar = Data; @@ -1151,8 +1378,6 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { NamePtr->setLinkage(GlobalValue::PrivateLinkage); // Collect the referenced names to be used by emitNameData. ReferencedNames.push_back(NamePtr); - - return PD.RegionCounters; } void InstrProfiling::emitVNodes() { diff --git a/llvm/test/Instrumentation/InstrProfiling/mcdc.ll b/llvm/test/Instrumentation/InstrProfiling/mcdc.ll new file mode 100644 index 00000000000000..fccb026c25bf26 --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/mcdc.ll @@ -0,0 +1,53 @@ +; Check that MC/DC intrinsics are properly lowered +; RUN: opt < %s -passes=instrprof -S | FileCheck %s +; RUN: opt < %s -passes=instrprof -runtime-counter-relocation -S 2>&1 | FileCheck %s --check-prefix RELOC + +; RELOC: Runtime counter relocation is presently not supported for MC/DC bitmaps + +target triple = "x86_64-unknown-linux-gnu" + +@__profn_test = private constant [4 x i8] c"test" + +; CHECK: @__profbm_test = private global [1 x i8] zeroinitializer, section "__llvm_prf_bits", comdat, align 1 + +define dso_local void @test(i32 noundef %A) { +entry: + %A.addr = alloca i32, align 4 + %mcdc.addr = alloca i32, align 4 + call void @llvm.instrprof.cover(ptr @__profn_test, i64 99278, i32 5, i32 0) + ; CHECK: store i8 0, ptr @__profc_test, align 1 + + call void @llvm.instrprof.mcdc.parameters(ptr @__profn_test, i64 99278, i32 1) + store i32 0, ptr %mcdc.addr, align 4 + %0 = load i32, ptr %A.addr, align 4 + %tobool = icmp ne i32 %0, 0 + + call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 99278, i32 0, ptr %mcdc.addr, i1 %tobool) + ; CHECK: %mcdc.temp = load i32, ptr %mcdc.addr, align 4 + ; CHECK-NEXT: %1 = zext i1 %tobool to i32 + ; CHECK-NEXT: %2 = shl i32 %1, 0 + ; CHECK-NEXT: %3 = or i32 %mcdc.temp, %2 + ; CHECK-NEXT: store i32 %3, ptr %mcdc.addr, align 4 + + call void @llvm.instrprof.mcdc.tvbitmap.update(ptr @__profn_test, i64 99278, i32 1, i32 0, ptr %mcdc.addr) + ; CHECK: %mcdc.temp1 = load i32, ptr %mcdc.addr, align 4 + ; CHECK-NEXT: %4 = lshr i32 %mcdc.temp1, 3 + ; CHECK-NEXT: %5 = zext i32 %4 to i64 + ; CHECK-NEXT: %6 = add i64 ptrtoint (ptr @__profbm_test to i64), %5 + ; CHECK-NEXT: %7 = inttoptr i64 %6 to ptr + ; CHECK-NEXT: %8 = and i32 %mcdc.temp1, 7 + ; CHECK-NEXT: %9 = trunc i32 %8 to i8 + ; CHECK-NEXT: %10 = shl i8 1, %9 + ; CHECK-NEXT: %mcdc.bits = load i8, ptr %7, align 1 + ; CHECK-NEXT: %11 = or i8 %mcdc.bits, %10 + ; CHECK-NEXT: store i8 %11, ptr %7, align 1 + ret void +} + +declare void @llvm.instrprof.cover(ptr, i64, i32, i32) + +declare void @llvm.instrprof.mcdc.parameters(ptr, i64, i32) + +declare void @llvm.instrprof.mcdc.condbitmap.update(ptr, i64, i32, ptr, i1) + +declare void @llvm.instrprof.mcdc.tvbitmap.update(ptr, i64, i32, i32, ptr) diff --git a/llvm/test/Transforms/PGOProfile/comdat_internal.ll b/llvm/test/Transforms/PGOProfile/comdat_internal.ll index 1c44a274f3c047..8c6942c0f527bc 100644 --- a/llvm/test/Transforms/PGOProfile/comdat_internal.ll +++ b/llvm/test/Transforms/PGOProfile/comdat_internal.ll @@ -13,9 +13,9 @@ $foo = comdat any ; CHECK: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat ; CHECK-NOT: __profn__stdin__foo ; CHECK: @__profc__stdin__foo.[[#FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8 -; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, ptr, ptr, i32, [2 x i16] } { i64 {{.*}}, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), ptr null +; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, i64, ptr, ptr, i32, [2 x i16], i32 } { i64 {{.*}}, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), i64 0, ptr null ; CHECK-NOT: @foo -; CHECK-SAME: , ptr null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8 +; CHECK-SAME: , ptr null, i32 1, [2 x i16] zeroinitializer, i32 0 }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8 ; CHECK: @__llvm_prf_nm ; CHECK: @llvm.compiler.used diff --git a/llvm/test/tools/llvm-profdata/Inputs/basic.profraw b/llvm/test/tools/llvm-profdata/Inputs/basic.profraw index ad88759398c6020f4ab8a5606258e69d98e36687..1b284b84fad6dd7f9407b1c3b99cb178af0e09c6 100644 GIT binary patch delta 63 zcmbQicz}_!u_!ISs37M*=R{6@Muv%+@_Y__aks4+{{I)d(5P~>exjd}00R^}C!MPiq_)7#Jp|DQPSa2C)!;5z2(oEDX7cnRyHh E0CJEFxc~qF diff --git a/llvm/test/tools/llvm-profdata/Inputs/c-general.profraw b/llvm/test/tools/llvm-profdata/Inputs/c-general.profraw index bc8fc5db1cb154d98ca962e84313463e3298cb92..9cd225587c92511e99f3497ce1d5f47c6fc5f0af 100644 GIT binary patch delta 308 zcmeC+d%(}xSd^AuRFLzZb0Vky_8T_ z2Map_g$4d1%$=MM7A^z|M<5H&2MaF*3U?q0gIzKCB2aV#lIY~~%z~3Sm;_Lj)^mdV hG1&qro`I~m9;i42C}@Bz*w4f{If6-G^8%&}764*Dk$V6D delta 330 zcmaFB-@(V(Sd^AuRFLzZV$QVgXRX-d83sq200i+W)W}ZBd!q5+)yH6rcP6 zC=1puPjh&2EJ diff --git a/llvm/test/tools/llvm-profdata/Inputs/compat.profdata.v10 b/llvm/test/tools/llvm-profdata/Inputs/compat.profdata.v10 new file mode 100644 index 0000000000000000000000000000000000000000..c331e30b48ff5d3be2efe4636d0c9fee56e764b5 GIT binary patch literal 872 zcmeyLQ&5zjmf6U~fE@@hqlzb>@!6o#0#KR>O2d@>u3-d=z-Xv61B@@A4iSOzA*vY| z7PO)2gNZxvLQF>`)21b~& z3=Oc72^^Cruy9PAA;2N==RX)sp2#ddaX%*`!^DH)91h5W??Hke#W_5X1SiWg0!1e) zGKzETKo<4~3kL#)1^yz;om>wVZUhQPAPcVt3vUDpcOVIaT`~C~P;>*5=;Zs%f|Dhf o1W=XsbAtRa*#jt^fvmV6sJH?sXn-s@pNVsF29v<%4NMj+0Qs7eE&u=k delta 364 zcmdnM|AJSru_!ISs37M*2Ll8MOcYd~=pmqxDy0AxV1$ap532 zg3W7Lmwt5a%Luw`8X;yQ=%O*PQGDV94nY>r<189Si!*Iwboso6f2t<{#V0E;atMY_ zsnBGduNo&k$1~O>`Qe2MkVFDdqEPdcjat*B{dO76%_kSkWcUG+SOAo;_m#;@Xm_l> z_CIV#TU6$@gvkdP#V0=i%0w>dTG2Ucam2*OHvOM-SQvJIWfYh=1iwzTW$_M(6^_4X z6sG@NZp{yn4A5N?3QrGb?)?~3dEpwwUk3mFgTdqpOcD~cFPdW;(Y diff --git a/llvm/test/tools/llvm-profdata/binary-ids-padding.test b/llvm/test/tools/llvm-profdata/binary-ids-padding.test index 67db5c98ef323a..eda63203a304a4 100644 --- a/llvm/test/tools/llvm-profdata/binary-ids-padding.test +++ b/llvm/test/tools/llvm-profdata/binary-ids-padding.test @@ -5,13 +5,15 @@ // INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) // INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +// INSTR_PROF_RAW_HEADER(uint64_t, NumBitmaskBytes, NumBitmaskBytes) // INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) +// INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin) // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) RUN: printf '\201rforpl\377' > %t.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw // There will be 2 20-byte binary IDs, so the total Binary IDs size will be 64 bytes. // 2 * 8 binary ID sizes // + 2 * 20 binary IDs (of size 20) @@ -23,8 +25,11 @@ RUN: printf '\2\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\3\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\20\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw @@ -51,14 +56,18 @@ RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\067\265\035\031\112\165\023\344' >> %t.profraw RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw -RUN: printf '\xd8\xff\3\0\1\0\0\0' >> %t.profraw +RUN: printf '\xc8\xff\3\0\1\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\023\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\067\0\0\0\0\0\0\0' >> %t.profraw diff --git a/llvm/test/tools/llvm-profdata/compat.proftext b/llvm/test/tools/llvm-profdata/compat.proftext index 73321cc5e66d46..111fd419881974 100644 --- a/llvm/test/tools/llvm-profdata/compat.proftext +++ b/llvm/test/tools/llvm-profdata/compat.proftext @@ -87,3 +87,26 @@ large_numbers # FORMATV4: Total functions: 3 # FORMATV4: Maximum function count: 2305843009213693952 # FORMATV4: Maximum internal block count: 1152921504606846976 + +# RUN: llvm-profdata show %S/Inputs/compat.profdata.v10 -all-functions --counts | FileCheck %s -check-prefix=FORMATV10 + +# FORMATV10: Counters: +# FORMATV10: large_numbers: +# FORMATV10: Hash: 0x3fffffffffffffff +# FORMATV10: Counters: 6 +# FORMATV10: Function count: 2305843009213693952 +# FORMATV10: Block counts: [1152921504606846976, 576460752303423488, 288230376151711744, 144115188075855872, 72057594037927936] +# FORMATV10: name with spaces: +# FORMATV10: Hash: 0x0000000000000400 +# FORMATV10: Counters: 2 +# FORMATV10: Function count: 0 +# FORMATV10: Block counts: [0] +# FORMATV10: function_count_only: +# FORMATV10: Hash: 0x0000000000000000 +# FORMATV10: Counters: 1 +# FORMATV10: Function count: 97531 +# FORMATV10: Block counts: [] +# FORMATV10: Functions shown: 3 +# FORMATV10: Total functions: 3 +# FORMATV10: Maximum function count: 2305843009213693952 +# FORMATV10: Maximum internal block count: 1152921504606846976 diff --git a/llvm/test/tools/llvm-profdata/large-binary-id-size.test b/llvm/test/tools/llvm-profdata/large-binary-id-size.test index 2394431e94de48..38b838e0d100af 100644 --- a/llvm/test/tools/llvm-profdata/large-binary-id-size.test +++ b/llvm/test/tools/llvm-profdata/large-binary-id-size.test @@ -1,5 +1,5 @@ RUN: printf '\201rforpl\377' > %t.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\40\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw @@ -9,6 +9,9 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw // Check for a corrupted size being too large past the end of the file. RUN: printf '\7\7\7\7\7\7\7\7' >> %t.profraw diff --git a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test index 06f418d0235d26..c967e850dbe352 100644 --- a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test +++ b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test @@ -5,20 +5,25 @@ // INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) // INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +// INSTR_PROF_RAW_HEADER(uint64_t, NumBitmaskBytes, NumBitmaskBytes) // INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) +// INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin) // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) RUN: printf '\201rforpl\377' > %t.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw @@ -35,7 +40,9 @@ RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\023\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\3\0foo\0\0\0' >> %t.profraw diff --git a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test index b718cf0fd8e972..e1e33824bf2f88 100644 --- a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test +++ b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test @@ -5,20 +5,26 @@ // INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) // INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +// INSTR_PROF_RAW_HEADER(uint64_t, NumBitmaskBytes, NumBitmaskBytes) // INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) +// INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin) // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) RUN: printf '\201rforpl\377' > %t.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw @@ -35,8 +41,10 @@ RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw // Make NumCounters = 0 so that we get "number of counters is zero" error message RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\023\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\3\0foo\0\0\0' >> %t.profraw diff --git a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test index 38e40334a6a690..3c23bc7dd0f7f9 100644 --- a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test +++ b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test @@ -5,20 +5,25 @@ // INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) // INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +// INSTR_PROF_RAW_HEADER(uint64_t, NumBitmaskBytes, NumBitmaskBytes) // INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) +// INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin) // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) RUN: printf '\201rforpl\377' > %t.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\2\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\6\0\1\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\6\0\2\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw @@ -38,10 +43,12 @@ RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw // Octal '\11' is 9 in decimal: this should push CounterOffset to 1. As there are two counters, // the profile reader should error out. RUN: printf '\11\0\6\0\1\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw // Counter Section diff --git a/llvm/test/tools/llvm-profdata/mcdc-bitmap.test b/llvm/test/tools/llvm-profdata/mcdc-bitmap.test new file mode 100644 index 00000000000000..a7b1b5df8c306c --- /dev/null +++ b/llvm/test/tools/llvm-profdata/mcdc-bitmap.test @@ -0,0 +1,201 @@ +# Test MC/DC bitmap reading and merging. + +# Merge as profdata. +RUN: split-file %s %t +RUN: llvm-profdata merge %t/mcdc-1.proftext %t/mcdc-2.proftext -o %t.profdata +RUN: llvm-profdata show %t.profdata --text -all-functions | FileCheck %s --check-prefix=MCDC +# Merge as proftext. +RUN: llvm-profdata merge %t/mcdc-1.proftext %t/mcdc-2.proftext -o %t.proftext +RUN: llvm-profdata show %t.proftext --text -all-functions | FileCheck %s --check-prefix=MCDC + +MCDC: # Num Bitmap Bytes: +MCDC-NEXT: $1 +MCDC-NEXT: # Bitmap Byte Values: +MCDC-NEXT: a +MCDC: # Num Bitmap Bytes: +MCDC-NEXT: $2 +MCDC-NEXT: # Bitmap Byte Values: +MCDC-NEXT: 0x29 +MCDC-NEXT: 0x0 + +# Merge as profdata. +RUN: llvm-profdata merge %t/mcdc-3.proftext %t/mcdc-4.proftext -o %t.profdata +RUN: llvm-profdata show %t.profdata --text -all-functions | FileCheck %s --check-prefix=MCDC2 +# Merge as proftext. +RUN: llvm-profdata merge %t/mcdc-3.proftext %t/mcdc-4.proftext -o %t.proftext +RUN: llvm-profdata show %t.proftext --text -all-functions | FileCheck %s --check-prefix=MCDC2 + +MCDC2: # Num Bitmap Bytes: +MCDC2-NEXT: $8 +MCDC2-NEXT: # Bitmap Byte Values: +MCDC2-NEXT: 0x1 +MCDC2-NEXT: 0x2 +MCDC2-NEXT: 0x3 +MCDC2-NEXT: 0xf +MCDC2-NEXT: 0xf +MCDC2-NEXT: 0xe +MCDC2-NEXT: 0xf +MCDC2-NEXT: 0xa + +# Incompatible size mismatch. +RUN: llvm-profdata merge %t/mcdc-2.proftext %t/mcdc-4.proftext -o %t.profdata 2>&1 | FileCheck %s --check-prefix=MCDC3 +# Merge as proftext +RUN: llvm-profdata merge %t/mcdc-2.proftext %t/mcdc-4.proftext -o %t.proftext 2>&1 | FileCheck %s --check-prefix=MCDC3 + +MCDC3: function bitmap size change detected (bitmap size mismatch) + +# Invalid number of bitmap bytes. +RUN: not llvm-profdata merge %t/mcdc-3.proftext %t/mcdc-err0.proftext -o %t.proftext 2>&1 | FileCheck %s --check-prefix=MCDC4 + +MCDC4: malformed instrumentation profile data: number of bitmap bytes is not a valid integer + +# Invalid bitmap byte. +RUN: not llvm-profdata merge %t/mcdc-3.proftext %t/mcdc-err1.proftext -o %t.proftext 2>&1 | FileCheck %s --check-prefix=MCDC5 + +MCDC5: malformed instrumentation profile data: bitmap byte is not a valid integer + +;--- mcdc-1.proftext +main +# Func Hash: +702755447896 +# Num Counters: +4 +# Counter Values: +1 +0 +1 +0 +# Num Bitmask Bytes: +$1 +# Bitmask Byte Values: +2 +;--- mcdc-2.proftext +main +# Func Hash: +702755447896 +# Num Counters: +4 +# Counter Values: +1 +1 +1 +1 +# Num Bitmask Bytes: +$1 +# Bitmask Byte Values: +8 + + +test3 +# Func Hash: +15288018065 +# Num Counters: +6 +# Counter Values: +4 +2 +1 +0 +0 +2 +# Num Bitmask Bytes: +$0x2 +# Bitmask Byte Values: +0x29 +0x0 +;--- mcdc-3.proftext +test3 +# Func Hash: +15288018065 +# Num Counters: +6 +# Counter Values: +4 +2 +1 +0 +0 +2 +# Num Bitmask Bytes: +$8 +# Bitmask Byte Values: +0x0 +0x2 +0x3 +0xf +0xf +0xa +0xc +0x2 +;--- mcdc-4.proftext +test3 +# Func Hash: +15288018065 +# Num Counters: +6 +# Counter Values: +4 +2 +1 +0 +0 +2 +# Num Bitmask Bytes: +$ 8 +# Bitmask Byte Values: +1 +2 +3 +4 +5 +6 +7 +8 +;--- mcdc-err0.proftext +test3 +# Func Hash: +15288018065 +# Num Counters: +6 +# Counter Values: +4 +2 +1 +0 +0 +2 +# Num Bitmask Bytes: +$8.9 +# Bitmask Byte Values: +1 +2 +3 +4 +5 +6 +7 +8 +;--- mcdc-err1.proftext +test3 +# Func Hash: +15288018065 +# Num Counters: +6 +# Counter Values: +4 +2 +1 +0 +0 +2 +# Num Bitmask Bytes: +$8 +# Bitmask Byte Values: +1 +2 +3 +4 +5.4 +6 +7 +8 diff --git a/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test b/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test index 171b5cc60878f4..4a5c42843ff4dd 100644 --- a/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test +++ b/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test @@ -1,5 +1,5 @@ RUN: printf '\201rforpl\377' > %t.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw +RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw // We should fail on this because the binary IDs is not a multiple of 8 bytes. RUN: printf '\77\0\0\0\0\0\0\0' >> %t.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw diff --git a/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test b/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test index c0072bcbde1b38..2a92575ee34075 100644 --- a/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test +++ b/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test @@ -8,6 +8,9 @@ RUN: printf '\0\0\0\0\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\3' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\20' >> %t RUN: printf '\0\0\0\1\0\4\0\0' >> %t RUN: printf '\0\0\0\2\0\4\0\0' >> %t diff --git a/llvm/test/tools/llvm-profdata/raw-32-bits-be.test b/llvm/test/tools/llvm-profdata/raw-32-bits-be.test index c8e862009ef028..fbd31d044382a1 100644 --- a/llvm/test/tools/llvm-profdata/raw-32-bits-be.test +++ b/llvm/test/tools/llvm-profdata/raw-32-bits-be.test @@ -1,37 +1,46 @@ RUN: printf '\377lprofR\201' > %t -RUN: printf '\0\0\0\0\0\0\0\10' >> %t +RUN: printf '\0\0\0\0\0\0\0\11' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\3' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\4' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\20' >> %t RUN: printf '\0\0\0\0\1\0\0\0' >> %t +RUN: printf '\0\0\0\0\3\0\0\0' >> %t RUN: printf '\0\0\0\0\2\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\134\370\302\114\333\030\275\254' >> %t RUN: printf '\0\0\0\0\0\0\0\1' >> %t RUN: printf '\1\0\0\0' >> %t +RUN: printf '\3\0\0\0' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\1' >> %t -RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\3' >> %t +RUN: printf '\0\0\0\0' >> %t RUN: printf '\344\023\165\112\031\035\265\067' >> %t RUN: printf '\0\0\0\0\0\0\0\2' >> %t -RUN: printf '\0\xff\xff\xe0' >> %t +RUN: printf '\0\xff\xff\xd8' >> %t +RUN: printf '\2\xff\xff\xd3' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\2' >> %t -RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\1' >> %t +RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\023' >> %t RUN: printf '\0\0\0\0\0\0\0\067' >> %t RUN: printf '\0\0\0\0\0\0\0\101' >> %t +RUN: printf '\125\125\125\052' >> %t RUN: printf '\7\0foo\1bar\0\0\0\0\0\0\0' >> %t RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s +RUN: llvm-profdata show %t -all-functions -text | FileCheck %s -check-prefix=MCDC CHECK: Counters: CHECK: foo: @@ -48,3 +57,14 @@ CHECK: Functions shown: 2 CHECK: Total functions: 2 CHECK: Maximum function count: 55 CHECK: Maximum internal block count: 65 + +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $3 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $1 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 0x2a diff --git a/llvm/test/tools/llvm-profdata/raw-32-bits-le.test b/llvm/test/tools/llvm-profdata/raw-32-bits-le.test index 523ff1ceb4807a..bb899c5fdb5555 100644 --- a/llvm/test/tools/llvm-profdata/raw-32-bits-le.test +++ b/llvm/test/tools/llvm-profdata/raw-32-bits-le.test @@ -1,37 +1,46 @@ RUN: printf '\201Rforpl\377' > %t -RUN: printf '\10\0\0\0\0\0\0\0' >> %t +RUN: printf '\11\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\2\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\3\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\4\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\20\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\1\0\0\0\0' >> %t +RUN: printf '\0\0\0\3\0\0\0\0' >> %t RUN: printf '\0\0\0\2\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\254\275\030\333\114\302\370\134' >> %t RUN: printf '\1\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\1' >> %t +RUN: printf '\0\0\0\3' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\1\0\0\0' >> %t -RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\3\0\0\0' >> %t +RUN: printf '\0\0\0\0' >> %t RUN: printf '\067\265\035\031\112\165\023\344' >> %t RUN: printf '\02\0\0\0\0\0\0\0' >> %t -RUN: printf '\xe0\xff\xff\0' >> %t +RUN: printf '\xd8\xff\xff\0' >> %t +RUN: printf '\xd3\xff\xff\2' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\0\0\0\0' >> %t RUN: printf '\2\0\0\0' >> %t -RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\1\0\0\0' >> %t +RUN: printf '\0\0\0\0' >> %t RUN: printf '\023\0\0\0\0\0\0\0' >> %t RUN: printf '\067\0\0\0\0\0\0\0' >> %t RUN: printf '\101\0\0\0\0\0\0\0' >> %t +RUN: printf '\125\125\125\052' >> %t RUN: printf '\7\0foo\1bar\0\0\0\0\0\0\0' >> %t RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s +RUN: llvm-profdata show %t -all-functions -text | FileCheck %s -check-prefix=MCDC CHECK: Counters: CHECK: foo: @@ -48,3 +57,14 @@ CHECK: Functions shown: 2 CHECK: Total functions: 2 CHECK: Maximum function count: 55 CHECK: Maximum internal block count: 65 + +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $3 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $1 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 0x2a diff --git a/llvm/test/tools/llvm-profdata/raw-64-bits-be.test b/llvm/test/tools/llvm-profdata/raw-64-bits-be.test index b2b8b31dafbf5a..8fcadb6a0dd28a 100644 --- a/llvm/test/tools/llvm-profdata/raw-64-bits-be.test +++ b/llvm/test/tools/llvm-profdata/raw-64-bits-be.test @@ -1,35 +1,44 @@ RUN: printf '\377lprofr\201' > %t -RUN: printf '\0\0\0\0\0\0\0\10' >> %t +RUN: printf '\0\0\0\0\0\0\0\11' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\3' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\4' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\20' >> %t RUN: printf '\0\0\0\1\0\4\0\0' >> %t +RUN: printf '\0\0\0\3\0\4\0\0' >> %t RUN: printf '\0\0\0\2\0\4\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\134\370\302\114\333\030\275\254' >> %t RUN: printf '\0\0\0\0\0\0\0\1' >> %t RUN: printf '\0\0\0\1\0\4\0\0' >> %t +RUN: printf '\0\0\0\3\0\4\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\1\0\0\0\0' >> %t +RUN: printf '\0\0\0\3\0\0\0\0' >> %t RUN: printf '\344\023\165\112\031\035\265\067' >> %t RUN: printf '\0\0\0\0\0\0\0\02' >> %t -RUN: printf '\0\0\0\1\0\3\xff\xd8' >> %t +RUN: printf '\0\0\0\1\0\3\xff\xc8' >> %t +RUN: printf '\0\0\0\3\0\3\xff\xc3' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\02\0\0\0\0' >> %t +RUN: printf '\0\0\0\1\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\023' >> %t RUN: printf '\0\0\0\0\0\0\0\067' >> %t RUN: printf '\0\0\0\0\0\0\0\101' >> %t +RUN: printf '\125\125\125\052' >> %t RUN: printf '\7\0foo\1bar\0\0\0\0\0\0\0' >> %t RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s +RUN: llvm-profdata show %t -all-functions -text | FileCheck %s -check-prefix=MCDC CHECK: Counters: CHECK: foo: @@ -46,3 +55,14 @@ CHECK: Functions shown: 2 CHECK: Total functions: 2 CHECK: Maximum function count: 55 CHECK: Maximum internal block count: 65 + +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $3 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $1 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 0x2a diff --git a/llvm/test/tools/llvm-profdata/raw-64-bits-le.test b/llvm/test/tools/llvm-profdata/raw-64-bits-le.test index 4e95798bc0afbd..0aa8b38f692672 100644 --- a/llvm/test/tools/llvm-profdata/raw-64-bits-le.test +++ b/llvm/test/tools/llvm-profdata/raw-64-bits-le.test @@ -1,35 +1,44 @@ RUN: printf '\201rforpl\377' > %t -RUN: printf '\10\0\0\0\0\0\0\0' >> %t +RUN: printf '\11\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\2\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\3\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\4\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\20\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\4\0\1\0\0\0' >> %t +RUN: printf '\0\0\4\0\3\0\0\0' >> %t RUN: printf '\0\0\4\0\2\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\254\275\030\333\114\302\370\134' >> %t RUN: printf '\1\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\4\0\1\0\0\0' >> %t +RUN: printf '\0\0\4\0\3\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\1\0\0\0\0\0\0\0' >> %t +RUN: printf '\3\0\0\0\0\0\0\0' >> %t RUN: printf '\067\265\035\031\112\165\023\344' >> %t RUN: printf '\02\0\0\0\0\0\0\0' >> %t -RUN: printf '\xd8\xff\3\0\1\0\0\0' >> %t +RUN: printf '\xc8\xff\3\0\1\0\0\0' >> %t +RUN: printf '\xc3\xff\3\0\3\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\02\0\0\0\0\0\0\0' >> %t +RUN: printf '\1\0\0\0\0\0\0\0' >> %t RUN: printf '\023\0\0\0\0\0\0\0' >> %t RUN: printf '\067\0\0\0\0\0\0\0' >> %t RUN: printf '\101\0\0\0\0\0\0\0' >> %t +RUN: printf '\125\125\125\052' >> %t RUN: printf '\7\0foo\1bar\0\0\0\0\0\0\0' >> %t RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s +RUN: llvm-profdata show %t -all-functions -text | FileCheck %s -check-prefix=MCDC CHECK: Counters: CHECK: foo: @@ -46,3 +55,14 @@ CHECK: Functions shown: 2 CHECK: Total functions: 2 CHECK: Maximum function count: 55 CHECK: Maximum internal block count: 65 + +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $3 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC-NEXT: 55 +MCDC: Num Bitmap Bytes: +MCDC-NEXT: $1 +MCDC-NEXT: Bitmap Byte Values: +MCDC-NEXT: 0x2a diff --git a/llvm/test/tools/llvm-profdata/raw-two-profiles.test b/llvm/test/tools/llvm-profdata/raw-two-profiles.test index 8d46c91e2732cd..f4a9aa8e1bbc3a 100644 --- a/llvm/test/tools/llvm-profdata/raw-two-profiles.test +++ b/llvm/test/tools/llvm-profdata/raw-two-profiles.test @@ -1,12 +1,15 @@ RUN: printf '\201rforpl\377' > %t-foo.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\11\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\10\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw @@ -15,20 +18,25 @@ RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\023\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\3\0foo\0\0\0' >> %t-foo.profraw RUN: printf '\201rforpl\377' > %t-bar.profraw -RUN: printf '\10\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\11\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\10\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw @@ -37,7 +45,9 @@ RUN: printf '\02\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\02\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\067\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\101\0\0\0\0\0\0\0' >> %t-bar.profraw From 7de70e0f724f7d0aec7ab0f78c648982989efc5b Mon Sep 17 00:00:00 2001 From: Z572 <37945516+Z572@users.noreply.github.com> Date: Tue, 31 Oct 2023 00:20:00 +0800 Subject: [PATCH 039/144] [Flang][OpenMP] Fix comments that should not be Sentinels on fixed format. (#68911) Fixes #68653 --- flang/lib/Parser/prescan.cpp | 16 +++++------ flang/test/Parser/OpenMP/sentinels.f | 42 ++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 flang/test/Parser/OpenMP/sentinels.f diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index 2f25b02bf7a323..f61eff5b0dd6aa 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -1177,16 +1177,14 @@ Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { char sentinel[5], *sp{sentinel}; int column{2}; for (; column < 6; ++column, ++p) { - if (*p != ' ') { - if (*p == '\n' || *p == '\t') { - break; - } - if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { - // OpenMP conditional compilation line: leave the label alone - break; - } - *sp++ = ToLowerCaseLetter(*p); + if (*p == ' ' || *p == '\n' || *p == '\t') { + break; + } + if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { + // OpenMP conditional compilation line: leave the label alone + break; } + *sp++ = ToLowerCaseLetter(*p); } if (column == 6) { if (*p == ' ' || *p == '\t' || *p == '0') { diff --git a/flang/test/Parser/OpenMP/sentinels.f b/flang/test/Parser/OpenMP/sentinels.f new file mode 100644 index 00000000000000..98d4bad19f6a27 --- /dev/null +++ b/flang/test/Parser/OpenMP/sentinels.f @@ -0,0 +1,42 @@ +! RUN: %flang_fc1 -fopenmp -E %s | FileCheck %s +! CHECK: program main +! CHECK: interface +! CHECK: subroutine sub(a, b) +! CHECK:!dir$ ignore_tkr a +! CHECK:!dir$ ignore_tkr b +! CHECK: real(4):: a, b +! CHECK: end subroutine +! CHECK: end interface +! CHECK: PRINT *, "Is ' '" +! CHECK: 123 PRINT *, "Is '123 '" + +!@cuf subroutine atcuf; + program main + interface + subroutine sub(a, b) +!dir$ ignore_tkr a +!dir$ ignore_tkr +!dir$+ b + real(4):: a, b + end subroutine + end interface +! +! comment line +!@fp PRINT *, "This is a comment line" +!@f p PRINT *, "This is a comment line" +!$ PRINT *, "Is ' '" +!$123 PRINT *, "Is '123 '" +!$ABC PRINT *, "Is 'ABC '" +! $ PRINT *, "This is a comment line 6" +c $This is a comment line +!0$110This is a comment line + +! $ This is a comment line +! $ 0This is a comment line +! &This is a comment line +! $ This is a comment line +! $ This is a comment line +C $ This is a comment line +c $ his is a comment line +* $ This is a comment line + end From 8a1719d3edbb04ac6a20062911d59d38aec3b2ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Mon, 30 Oct 2023 16:16:53 +0100 Subject: [PATCH 040/144] [clang][Interp][NFC] Use delegate() in VisitCXXBindTemporaryExpr --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 195af664c13daf..485893d58f487a 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -1314,9 +1314,7 @@ bool ByteCodeExprGen::VisitMaterializeTemporaryExpr( template bool ByteCodeExprGen::VisitCXXBindTemporaryExpr( const CXXBindTemporaryExpr *E) { - if (Initializing) - return this->visitInitializer(E->getSubExpr()); - return this->visit(E->getSubExpr()); + return this->delegate(E->getSubExpr()); } template From f75370310c097cde2fce5d980398ecef8f48b633 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 30 Oct 2023 09:24:18 -0700 Subject: [PATCH 041/144] [X86] Print 'l' section flag for SHF_X86_64_LARGE (#70380) When directly compiling to an object file we properly set the section flag, but not when emitting assembly. --- llvm/lib/MC/MCSectionELF.cpp | 3 +++ llvm/test/MC/ELF/section.s | 2 ++ 2 files changed, 5 insertions(+) diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp index 666252ffcb74eb..95fdf33522076e 100644 --- a/llvm/lib/MC/MCSectionELF.cpp +++ b/llvm/lib/MC/MCSectionELF.cpp @@ -123,6 +123,9 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, } else if (Arch == Triple::hexagon) { if (Flags & ELF::SHF_HEX_GPREL) OS << 's'; + } else if (Arch == Triple::x86_64) { + if (Flags & ELF::SHF_X86_64_LARGE) + OS << 'l'; } OS << '"'; diff --git a/llvm/test/MC/ELF/section.s b/llvm/test/MC/ELF/section.s index 3a6d046821f4ae..8c625256a2761d 100644 --- a/llvm/test/MC/ELF/section.s +++ b/llvm/test/MC/ELF/section.s @@ -269,6 +269,8 @@ bar: // CHECK-NEXT: ] .section .large,"l" +// ASM: .section .large,"l" + // CHECK: Section { // CHECK: Name: .large // CHECK-NEXT: Type: SHT_PROGBITS From 3c5885535a82fc5266450620da8c2b880ae9b497 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 30 Oct 2023 12:28:51 -0400 Subject: [PATCH 042/144] [libc++][tests] Fix a few remaining instances of outdated static assertion regexes in our test suite (#70454) This is a re-application of 166b3a86173, which was reverted in fde1ecdec878b because it broke some tests. --- .../cpp17_iterator_concepts.verify.cpp | 86 +++++++++---------- .../mdspan/layout_stride/extents.verify.cpp | 4 +- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/libcxx/test/libcxx/algorithms/cpp17_iterator_concepts.verify.cpp b/libcxx/test/libcxx/algorithms/cpp17_iterator_concepts.verify.cpp index 3bc49cd3b1b183..344543d5f19ffe 100644 --- a/libcxx/test/libcxx/algorithms/cpp17_iterator_concepts.verify.cpp +++ b/libcxx/test/libcxx/algorithms/cpp17_iterator_concepts.verify.cpp @@ -83,26 +83,26 @@ struct diff_t_not_signed : valid_iterator { }; void check_iterator_requirements() { - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error {{static assertion failed}} // expected-note@*:* {{indirection requires pointer operand}} - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error {{static assertion failed}} // expected-note@*:* {{cannot increment value of type 'missing_preincrement'}} - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error {{static assertion failed}} // expected-note@*:* {{because 'not_move_constructible' does not satisfy '__cpp17_move_constructible'}} - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error {{static assertion failed}} // expected-note@*:* {{because 'not_copy_constructible' does not satisfy '__cpp17_copy_constructible'}} - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error {{static assertion failed}} // expected-note@*:* {{because 'not_move_assignable' does not satisfy '__cpp17_copy_assignable'}} - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error {{static assertion failed}} // expectted-note@*:* {{because 'not_copy_assignable' does not satisfy '__cpp17_copy_assignable'}} - static_assert(std::__cpp17_iterator); // expected-error-re {{{{static assertion|static_assert}} failed}} + static_assert(std::__cpp17_iterator); // expected-error {{static assertion failed}} // expectted-note@*:* {{'is_signed_v<__iter_diff_t >' evaluated to false}} } @@ -115,10 +115,10 @@ bool operator==(not_unequality_comparable, not_unequality_comparable); bool operator!=(not_unequality_comparable, not_unequality_comparable) = delete; void check_input_iterator_requirements() { - _LIBCPP_REQUIRE_CPP17_INPUT_ITERATOR(not_equality_comparable); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_INPUT_ITERATOR(not_equality_comparable); // expected-error {{static assertion failed}} // expected-note@*:* {{'__lhs == __rhs' would be invalid: overload resolution selected deleted operator '=='}} - _LIBCPP_REQUIRE_CPP17_INPUT_ITERATOR(not_unequality_comparable); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_INPUT_ITERATOR(not_unequality_comparable); // expected-error {{static assertion failed}} // expected-note@*:* {{'__lhs != __rhs' would be invalid: overload resolution selected deleted operator '!='}} } @@ -138,9 +138,9 @@ struct postincrement_not_ref : valid_iterator {}; bool operator==(postincrement_not_ref, postincrement_not_ref); void check_forward_iterator_requirements() { - _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(not_default_constructible); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(not_default_constructible); // expected-error {{static assertion failed}} // expected-note@*:* {{because 'not_default_constructible' does not satisfy '__cpp17_default_constructible'}} - _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(postincrement_not_ref); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(postincrement_not_ref); // expected-error {{static assertion failed}} #ifndef _AIX // expected-note@*:* {{because type constraint 'convertible_to::Proxy, const postincrement_not_ref &>' was not satisfied}} #endif @@ -167,11 +167,11 @@ struct not_returning_iter_reference : valid_forward_iterator >' was not satisfied}} } @@ -359,62 +359,62 @@ struct missing_const_const_greater_eq : valid_random_access_iterator __iter' would be invalid: overload resolution selected deleted operator '>'}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_mut_greater); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_mut_greater); // expected-error {{static assertion failed}} // expected-note@*:* {{because 'std::as_const(__iter) > __iter' would be invalid: overload resolution selected deleted operator '>'}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_mut_const_greater); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_mut_const_greater); // expected-error {{static assertion failed}} // expected-note@*:* {{because '__iter > std::as_const(__iter)' would be invalid: overload resolution selected deleted operator '>'}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_const_greater); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_const_greater); // expected-error {{static assertion failed}} // expected-note@*:* {{because 'std::as_const(__iter) > std::as_const(__iter)' would be invalid: overload resolution selected deleted operator '>'}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_less_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_less_eq); // expected-error {{static assertion failed}} // expected-note@*:* {{because '__iter <= __iter' would be invalid: overload resolution selected deleted operator '<='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_mut_less_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_mut_less_eq); // expected-error {{static assertion failed}} // expected-note@*:* {{because 'std::as_const(__iter) <= __iter' would be invalid: overload resolution selected deleted operator '<='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_mut_const_less_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_mut_const_less_eq); // expected-error {{static assertion failed}} // expected-note@*:* {{because '__iter <= std::as_const(__iter)' would be invalid: overload resolution selected deleted operator '<='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_const_less_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_const_less_eq); // expected-error {{static assertion failed}} // expected-note@*:* {{because 'std::as_const(__iter) <= std::as_const(__iter)' would be invalid: overload resolution selected deleted operator '<='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_greater_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_greater_eq); // expected-error {{static assertion failed}} // expected-note@*:* {{because '__iter >= __iter' would be invalid: overload resolution selected deleted operator '>='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_mut_greater_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_mut_greater_eq); // expected-error {{static assertion failed}} // expected-note@*:* {{because 'std::as_const(__iter) >= __iter' would be invalid: overload resolution selected deleted operator '>='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_mut_const_greater_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_mut_const_greater_eq); // expected-error {{static assertion failed}} // expected-note@*:* {{because '__iter >= std::as_const(__iter)' would be invalid: overload resolution selected deleted operator '>='}} - _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_const_greater_eq); // expected-error-re {{{{static assertion|static_assert}} failed}} + _LIBCPP_REQUIRE_CPP17_RANDOM_ACCESS_ITERATOR(missing_const_const_greater_eq); // expected-error {{static assertion failed}} // expected-note@*:* {{because 'std::as_const(__iter) >= std::as_const(__iter)' would be invalid: overload resolution selected deleted operator '>='}} } diff --git a/libcxx/test/std/containers/views/mdspan/layout_stride/extents.verify.cpp b/libcxx/test/std/containers/views/mdspan/layout_stride/extents.verify.cpp index 4742527f7af11e..46f2b774bcbd9f 100644 --- a/libcxx/test/std/containers/views/mdspan/layout_stride/extents.verify.cpp +++ b/libcxx/test/std/containers/views/mdspan/layout_stride/extents.verify.cpp @@ -23,11 +23,11 @@ #include void not_extents() { - // expected-error-re@*:* {{{{(static_assert|static assertion)}} failed {{.*}}layout_stride::mapping template argument must be a specialization of extents}} + // expected-error-re@*:* {{static assertion failed {{.*}}layout_stride::mapping template argument must be a specialization of extents}} [[maybe_unused]] std::layout_stride::mapping mapping; } void representable() { - // expected-error-re@*:* {{{{(static_assert|static assertion)}} failed {{.*}}layout_stride::mapping product of static extents must be representable as index_type.}} + // expected-error-re@*:* {{static assertion failed {{.*}}layout_stride::mapping product of static extents must be representable as index_type.}} [[maybe_unused]] std::layout_stride::mapping> mapping; } From 101008be830bb475f717a388c69cea1f48677baf Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 30 Oct 2023 16:44:22 +0000 Subject: [PATCH 043/144] [AMDGPU] CodeGen for 64-bit buffer atomic cmpswap intrinsics (#70475) Implement codegen for: llvm.amdgcn.raw.buffer.atomic.cmpswap.i64 llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64 llvm.amdgcn.struct.buffer.atomic.cmpswap.i64 llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64 --- llvm/lib/Target/AMDGPU/BUFInstructions.td | 51 ++-- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 5 +- .../llvm.amdgcn.raw.buffer.atomic.cmpswap.ll | 242 ++++++++++++++++- ...vm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll | 242 ++++++++++++++++- ...lvm.amdgcn.struct.buffer.atomic.cmpswap.ll | 247 +++++++++++++++++- ...amdgcn.struct.ptr.buffer.atomic.cmpswap.ll | 247 +++++++++++++++++- .../AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll | 22 ++ .../llvm.amdgcn.raw.ptr.buffer.atomic.ll | 22 ++ .../llvm.amdgcn.struct.buffer.atomic.ll | 28 ++ .../llvm.amdgcn.struct.ptr.buffer.atomic.ll | 28 ++ 10 files changed, 1070 insertions(+), 64 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 897bbfa5c58bee..31fdd2c8e2b361 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1659,73 +1659,76 @@ let SubtargetPredicate = isGFX90APlus in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">; } // End SubtargetPredicate = isGFX90APlus -multiclass SIBufferAtomicCmpSwapPat { - +multiclass SIBufferAtomicCmpSwapPat { foreach RtnMode = ["ret", "noret"] in { - defvar Op = !cast(SIbuffer_atomic_cmpswap # !if(!eq(RtnMode, "ret"), "", "_noret")); defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy), (timm:$cachepolicy)); + defvar SrcRC = getVregSrcForVT.ret; + defvar DataRC = getVregSrcForVT.ret; + defvar SubLo = !if(!eq(vt, i32), sub0, sub0_sub1); + defvar SubHi = !if(!eq(vt, i32), sub1, sub2_sub3); defvar OffsetResDag = (!cast(Inst # "_OFFSET" # InstSuffix) - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + (REG_SEQUENCE DataRC, SrcRC:$data, SubLo, SrcRC:$cmp, SubHi), SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy); def : GCNPat< - (Op - i32:$data, i32:$cmp, v4i32:$rsrc, 0, 0, i32:$soffset, - timm:$offset, timm:$cachepolicy, 0), + (vt (Op + vt:$data, vt:$cmp, v4i32:$rsrc, 0, 0, i32:$soffset, + timm:$offset, timm:$cachepolicy, 0)), !if(!eq(RtnMode, "ret"), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffsetResDag, VReg_64)), sub0), + (EXTRACT_SUBREG OffsetResDag, SubLo), OffsetResDag) >; defvar IdxenResDag = (!cast(Inst # "_IDXEN" # InstSuffix) - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + (REG_SEQUENCE DataRC, SrcRC:$data, SubLo, SrcRC:$cmp, SubHi), VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy); def : GCNPat< - (Op - i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, + (vt (Op + vt:$data, vt:$cmp, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, - timm:$cachepolicy, timm), + timm:$cachepolicy, timm)), !if(!eq(RtnMode, "ret"), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS IdxenResDag, VReg_64)), sub0), + (EXTRACT_SUBREG IdxenResDag, SubLo), IdxenResDag) >; defvar OffenResDag = (!cast(Inst # "_OFFEN" # InstSuffix) - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + (REG_SEQUENCE DataRC, SrcRC:$data, SubLo, SrcRC:$cmp, SubHi), VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy); def : GCNPat< - (Op - i32:$data, i32:$cmp, v4i32:$rsrc, 0, + (vt (Op + vt:$data, vt:$cmp, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, - timm:$cachepolicy, 0), + timm:$cachepolicy, 0)), !if(!eq(RtnMode, "ret"), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffenResDag, VReg_64)), sub0), + (EXTRACT_SUBREG OffenResDag, SubLo), OffenResDag) >; defvar BothenResDag = (!cast(Inst # "_BOTHEN" # InstSuffix) - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + (REG_SEQUENCE DataRC, SrcRC:$data, SubLo, SrcRC:$cmp, SubHi), (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy); def : GCNPat< - (Op - i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, + (vt (Op + vt:$data, vt:$cmp, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, - timm:$cachepolicy, timm), + timm:$cachepolicy, timm)), !if(!eq(RtnMode, "ret"), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS BothenResDag, VReg_64)), sub0), + (EXTRACT_SUBREG BothenResDag, SubLo), BothenResDag) >; } // end foreach RtnMode } -defm : SIBufferAtomicCmpSwapPat<"BUFFER_ATOMIC_CMPSWAP">; +defm : SIBufferAtomicCmpSwapPat; +defm : SIBufferAtomicCmpSwapPat; class MUBUFLoad_PatternADDR64 : GCNPat < diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index b0b91d83171880..b0493edfa335ac 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -199,10 +199,7 @@ defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", SDTypeProfile<1, 9, - [SDTCisVT<0, i32>, // dst - SDTCisVT<1, i32>, // src - SDTCisVT<2, i32>, // cmp - SDTCisVT<3, v4i32>, // rsrc + [SDTCisVT<3, v4i32>, // rsrc SDTCisVT<4, i32>, // vindex(VGPR) SDTCisVT<5, i32>, // voffset(VGPR) SDTCisVT<6, i32>, // soffset(SGPR) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll index d3477accf83d32..a78199bcebd2ee 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll @@ -196,21 +196,235 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_ ret float %cast } +; Natural mapping +define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + %cast = bitcast i64 %ret to double + ret double %cast +} -; FIXME: 64-bit not handled -; ; Natural mapping -; define amdgpu_ps <2 x float> @raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { -; %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) -; %cast = bitcast i64 %ret to <2 x float> -; ret <2 x float> %cast -; } +; Natural mapping +define amdgpu_ps void @raw_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; CHECK-NEXT: S_ENDPGM 0 + %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; All operands need regbank legalization +define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i64 inreg %val, i64 inreg %cmp, <4 x i32> %rsrc, i32 inreg %voffset, i32 %soffset) { + ; CHECK-LABEL: name: raw_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub0 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec + ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_5]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec + ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_6]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + %cast = bitcast i64 %ret to double + ret double %cast +} -; define amdgpu_ps void @raw_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { -; %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) -; ret void -; } +; All operands need regbank legalization +define amdgpu_ps void @raw_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i64 inreg %val, i64 inreg %cmp, <4 x i32> %rsrc, i32 inreg %voffset, i32 %soffset) { + ; CHECK-LABEL: name: raw_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: S_ENDPGM 0 + %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} -declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32 immarg) #0 -declare i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32 immarg) #0 +define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095(i64 %val, i64 %cmp, <4 x i32> inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %voffset = add i32 %voffset.base, 4095 + %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + %cast = bitcast i64 %ret to double + ret double %cast +} -attributes #0 = { nounwind } +declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32 immarg) +declare i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll index e3efe0e3ae41c7..56b2d0452dd45f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll @@ -196,21 +196,235 @@ define amdgpu_ps float @raw_ptr_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__s ret float %cast } +; Natural mapping +define amdgpu_ps double @raw_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) + %cast = bitcast i64 %ret to double + ret double %cast +} -; FIXME: 64-bit not handled -; ; Natural mapping -; define amdgpu_ps <2 x float> @raw_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { -; %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) -; %cast = bitcast i64 %ret to <2 x float> -; ret <2 x float> %cast -; } +; Natural mapping +define amdgpu_ps void @raw_ptr_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: S_ENDPGM 0 + %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; All operands need regbank legalization +define amdgpu_ps double @raw_ptr_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i64 inreg %val, i64 inreg %cmp, ptr addrspace(8) %rsrc, i32 inreg %voffset, i32 %soffset) { + ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub0 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec + ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_5]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec + ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_6]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) + %cast = bitcast i64 %ret to double + ret double %cast +} -; define amdgpu_ps void @raw_ptr_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { -; %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) -; ret void -; } +; All operands need regbank legalization +define amdgpu_ps void @raw_ptr_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i64 inreg %val, i64 inreg %cmp, ptr addrspace(8) %rsrc, i32 inreg %voffset, i32 %soffset) { + ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY9]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: S_ENDPGM 0 + %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} -declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32 immarg) #0 -declare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32 immarg) #0 +define amdgpu_ps double @raw_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095(i64 %val, i64 %cmp, ptr addrspace(8) inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %voffset = add i32 %voffset.base, 4095 + %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) + %cast = bitcast i64 %ret to double + ret double %cast +} -attributes #0 = { nounwind } +declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32 immarg) +declare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll index d9e35ff729debf..f4ca44be373fa5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s - ; Natural mapping define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i32 %val, i32 %cmp, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: name: struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -209,7 +208,247 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg ret float %cast } -declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32 immarg) #0 -declare i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32, i32 immarg) #0 +; Natural mapping +define amdgpu_ps double @struct_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: struct_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub0 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + %cast = bitcast i64 %ret to double + ret double %cast +} + +; Natural mapping +define amdgpu_ps void @struct_buffer_atomic_cmpswap_noret_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: struct_buffer_atomic_cmpswap_noret_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; CHECK-NEXT: S_ENDPGM 0 + %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; All operands need legalization +define amdgpu_ps double @struct_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i64 inreg %val, i64 inreg %cmp, <4 x i32> %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 %soffset) { + ; CHECK-LABEL: name: struct_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr7 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY11]], %subreg.sub0_sub1, [[COPY12]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN]].sub0_sub1 + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY19]].sub0 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY19]].sub1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec + ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_5]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec + ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_6]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + %cast = bitcast i64 %ret to double + ret double %cast +} + +; All operands need legalization +define amdgpu_ps void @struct_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i64 inreg %val, i64 inreg %cmp, <4 x i32> %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 %soffset) { + ; CHECK-LABEL: name: struct_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr7 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY11]], %subreg.sub0_sub1, [[COPY12]], %subreg.sub2_sub3 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: S_ENDPGM 0 + %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps double @struct_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add4095(i64 %val, i64 %cmp, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset.base, i32 inreg %soffset) { + ; CHECK-LABEL: name: struct_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add4095 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub0 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %voffset = add i32 %voffset.base, 4095 + %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + %cast = bitcast i64 %ret to double + ret double %cast +} -attributes #0 = { nounwind } +declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32 immarg) +declare i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll index 662643c3c680de..e8e6cab4edbe89 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s - ; Natural mapping define amdgpu_ps float @struct_ptr_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i32 %val, i32 %cmp, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: name: struct_ptr_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -209,7 +208,247 @@ define amdgpu_ps float @struct_ptr_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp ret float %cast } -declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32, i32 immarg) #0 -declare i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32, i32 immarg) #0 +; Natural mapping +define amdgpu_ps double @struct_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: struct_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub0 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %ret = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + %cast = bitcast i64 %ret to double + ret double %cast +} + +; Natural mapping +define amdgpu_ps void @struct_ptr_buffer_atomic_cmpswap_noret_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: struct_ptr_buffer_atomic_cmpswap_noret_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: S_ENDPGM 0 + %ret = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; All operands need legalization +define amdgpu_ps double @struct_ptr_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i64 inreg %val, i64 inreg %cmp, ptr addrspace(8) %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 %soffset) { + ; CHECK-LABEL: name: struct_ptr_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr7 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY11]], %subreg.sub0_sub1, [[COPY12]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN]].sub0_sub1 + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY19]].sub0 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY19]].sub1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec + ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_5]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec + ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_6]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %ret = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + %cast = bitcast i64 %ret to double + ret double %cast +} + +; All operands need legalization +define amdgpu_ps void @struct_ptr_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i64 inreg %val, i64 inreg %cmp, ptr addrspace(8) %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 %soffset) { + ; CHECK-LABEL: name: struct_ptr_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr7 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY11]], %subreg.sub0_sub1, [[COPY12]], %subreg.sub2_sub3 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: S_ENDPGM 0 + %ret = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps double @struct_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add4095(i64 %val, i64 %cmp, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset.base, i32 inreg %soffset) { + ; CHECK-LABEL: name: struct_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add4095 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY10]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_BOTHEN_RTN]].sub0_sub1 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub0 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY11]].sub1 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %voffset = add i32 %voffset.base, 4095 + %ret = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + %cast = bitcast i64 %ret to double + ret double %cast +} -attributes #0 = { nounwind } +declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32, i32 immarg) +declare i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll index 1e76044d935c26..a197d4802188a0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll @@ -108,6 +108,27 @@ main_body: ret float %v.float } +;CHECK-LABEL: {{^}}test5: +;CHECK-NOT: s_waitcnt +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc +;CHECK-DAG: s_waitcnt vmcnt(0) +;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, v4, s[0:3], 0 offen glc +;CHECK: s_waitcnt vmcnt(0) +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, v4, s[0:3], 0 offen offset:44 glc +;CHECK-DAG: s_waitcnt vmcnt(0) +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:4 glc +define amdgpu_ps float @test5(<4 x i32> inreg %rsrc, i64 %data, i64 %cmp, i32 %vindex, i32 %voffset) { +main_body: + %o1 = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %data, i64 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0) + %o3 = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %o1, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0) + %ofs.5 = add i32 %voffset, 44 + %o5 = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %o3, i64 %cmp, <4 x i32> %rsrc, i32 %ofs.5, i32 0, i32 0) + %o6 = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %o5, i64 %cmp, <4 x i32> %rsrc, i32 4, i32 8188, i32 0) + %out = sitofp i64 %o6 to float + ret float %out +} + declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32) #0 declare float @llvm.amdgcn.raw.buffer.atomic.swap.f32(float, <4 x i32>, i32, i32, i32) #0 declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #0 @@ -122,5 +143,6 @@ declare i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32 declare i32 @llvm.amdgcn.raw.buffer.atomic.inc.i32(i32, <4 x i32>, i32, i32, i32) #0 declare i32 @llvm.amdgcn.raw.buffer.atomic.dec.i32(i32, <4 x i32>, i32, i32, i32) #0 declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #0 +declare i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32) #0 attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll index 3ac9e84171450b..2b7ef147cae0f0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll @@ -108,6 +108,27 @@ main_body: ret float %v.float } +;CHECK-LABEL: {{^}}test5: +;CHECK-NOT: s_waitcnt +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc +;CHECK-DAG: s_waitcnt vmcnt(0) +;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, v4, s[0:3], 0 offen glc +;CHECK: s_waitcnt vmcnt(0) +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, v4, s[0:3], 0 offen offset:44 glc +;CHECK-DAG: s_waitcnt vmcnt(0) +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:4 glc +define amdgpu_ps float @test5(ptr addrspace(8) inreg %rsrc, i64 %data, i64 %cmp, i32 %vindex, i32 %voffset) { +main_body: + %o1 = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %data, i64 %cmp, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) + %o3 = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %o1, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0) + %ofs.5 = add i32 %voffset, 44 + %o5 = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %o3, i64 %cmp, ptr addrspace(8) %rsrc, i32 %ofs.5, i32 0, i32 0) + %o6 = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %o5, i64 %cmp, ptr addrspace(8) %rsrc, i32 4, i32 8188, i32 0) + %out = sitofp i64 %o6 to float + ret float %out +} + declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32) #0 declare float @llvm.amdgcn.raw.ptr.buffer.atomic.swap.f32(float, ptr addrspace(8), i32, i32, i32) #0 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32) #0 @@ -122,5 +143,6 @@ declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i3 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32) #0 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32) #0 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32) #0 +declare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32) #0 attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll index fd1016f3379b74..8e709e846842e1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll @@ -120,6 +120,33 @@ main_body: ret float %v.float } +;CHECK-LABEL: {{^}}test5: +;CHECK-NOT: s_waitcnt +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 idxen glc +;CHECK: s_waitcnt vmcnt(0) +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 idxen glc +;CHECK: s_waitcnt vmcnt(0) +;CHECK: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen glc +;CHECK: s_waitcnt vmcnt(0) +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen glc +;CHECK: s_waitcnt vmcnt(0) +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen offset:44 glc +;CHECK-DAG: s_waitcnt vmcnt(0) +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], [[SOFS]] idxen offset:4 glc +define amdgpu_ps float @test5(<4 x i32> inreg %rsrc, i64 %data, i64 %cmp, i32 %vindex, i32 %voffset) { +main_body: + %o1 = call i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64 %data, i64 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) + %o2 = call i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64 %o1, i64 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) + %o3 = call i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64 %o2, i64 %cmp, <4 x i32> %rsrc, i32 0, i32 %voffset, i32 0, i32 0) + %o4 = call i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64 %o3, i64 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 0, i32 0) + %offs.5 = add i32 %voffset, 44 + %o5 = call i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64 %o4, i64 %cmp, <4 x i32> %rsrc, i32 0, i32 %offs.5, i32 0, i32 0) + %o6 = call i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64 %o5, i64 %cmp, <4 x i32> %rsrc, i32 0, i32 4, i32 8188, i32 0) + %out = sitofp i64 %o6 to float + ret float %out +} + declare i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 declare float @llvm.amdgcn.struct.buffer.atomic.swap.f32(float, <4 x i32>, i32, i32, i32, i32) #0 declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 @@ -134,5 +161,6 @@ declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, declare i32 @llvm.amdgcn.struct.buffer.atomic.inc.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 declare i32 @llvm.amdgcn.struct.buffer.atomic.dec.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #0 +declare i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32, i32) #0 attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll index 480c24706c3c32..2888e2280de932 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll @@ -120,6 +120,33 @@ main_body: ret float %v.float } +;CHECK-LABEL: {{^}}test5: +;CHECK-NOT: s_waitcnt +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 idxen glc +;CHECK: s_waitcnt vmcnt(0) +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 idxen glc +;CHECK: s_waitcnt vmcnt(0) +;CHECK: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen glc +;CHECK: s_waitcnt vmcnt(0) +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen glc +;CHECK: s_waitcnt vmcnt(0) +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen offset:44 glc +;CHECK-DAG: s_waitcnt vmcnt(0) +;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], [[SOFS]] idxen offset:4 glc +define amdgpu_ps float @test5(ptr addrspace(8) inreg %rsrc, i64 %data, i64 %cmp, i32 %vindex, i32 %voffset) { +main_body: + %o1 = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64 %data, i64 %cmp, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) + %o2 = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64 %o1, i64 %cmp, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) + %o3 = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64 %o2, i64 %cmp, ptr addrspace(8) %rsrc, i32 0, i32 %voffset, i32 0, i32 0) + %o4 = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64 %o3, i64 %cmp, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 0, i32 0) + %offs.5 = add i32 %voffset, 44 + %o5 = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64 %o4, i64 %cmp, ptr addrspace(8) %rsrc, i32 0, i32 %offs.5, i32 0, i32 0) + %o6 = call i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64 %o5, i64 %cmp, ptr addrspace(8) %rsrc, i32 0, i32 4, i32 8188, i32 0) + %out = sitofp i64 %o6 to float + ret float %out +} + declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0 declare float @llvm.amdgcn.struct.ptr.buffer.atomic.swap.f32(float, ptr addrspace(8), i32, i32, i32, i32) #0 declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0 @@ -134,5 +161,6 @@ declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0 declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0 declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32, i32) #0 +declare i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32, i32) #0 attributes #0 = { nounwind } From f706837e2b85c7e5f29b118d0ecac41ba23e226f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 30 Oct 2023 09:51:42 -0700 Subject: [PATCH 044/144] [flang][mlir][openacc] Switch device_type representation to an enum (#70250) Switch the representation from scalar integer to a enumeration. The parser transform the string in the input to the correct enumeration. --- flang/include/flang/Parser/dump-parse-tree.h | 1 + flang/include/flang/Parser/parse-tree.h | 4 +- flang/lib/Lower/OpenACC.cpp | 95 +++++++++++-------- flang/lib/Parser/openacc-parsers.cpp | 12 ++- flang/test/Lower/OpenACC/acc-init.f90 | 10 +- flang/test/Lower/OpenACC/acc-set.f90 | 8 +- flang/test/Lower/OpenACC/acc-shutdown.f90 | 6 +- flang/test/Lower/OpenACC/acc-update.f90 | 9 +- flang/test/Semantics/OpenACC/acc-data.f90 | 2 +- .../Semantics/OpenACC/acc-init-validity.f90 | 8 +- .../Semantics/OpenACC/acc-kernels-loop.f90 | 4 +- flang/test/Semantics/OpenACC/acc-kernels.f90 | 4 +- flang/test/Semantics/OpenACC/acc-parallel.f90 | 6 +- .../Semantics/OpenACC/acc-set-validity.f90 | 8 +- .../OpenACC/acc-shutdown-validity.f90 | 8 +- .../Semantics/OpenACC/acc-update-validity.f90 | 2 +- .../mlir/Dialect/OpenACC/OpenACCOps.td | 45 ++++++--- mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 5 +- mlir/test/Dialect/OpenACC/invalid.mlir | 2 +- mlir/test/Dialect/OpenACC/ops.mlir | 20 ++-- 20 files changed, 146 insertions(+), 113 deletions(-) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 494e54faa64c84..7c479a2334ea55 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -101,6 +101,7 @@ class ParseTreeDumper { NODE(parser, AccSelfClause) NODE(parser, AccStandaloneDirective) NODE(parser, AccDeviceTypeExpr) + NODE_ENUM(parser::AccDeviceTypeExpr, Device) NODE(parser, AccDeviceTypeExprList) NODE(parser, AccTileExpr) NODE(parser, AccTileExprList) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 83c8db936934a0..4806fc49f3441d 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4061,9 +4061,9 @@ struct AccWaitArgument { }; struct AccDeviceTypeExpr { - TUPLE_CLASS_BOILERPLATE(AccDeviceTypeExpr); + ENUM_CLASS(Device, Star, Default, Nvidia, Radeon, Host, Multicore) + WRAPPER_CLASS_BOILERPLATE(AccDeviceTypeExpr, Device); CharBlock source; - std::tuple> t; // if null then * }; struct AccDeviceTypeExprList { diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 5c29c781417301..3f7ef5e5747126 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -150,7 +150,7 @@ static void createDeclareAllocFuncWithArg(mlir::OpBuilder &modBuilder, builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds, /*structured=*/false, /*implicit=*/true, mlir::acc::DataClause::acc_update_device, descTy); - llvm::SmallVector operandSegments{0, 0, 0, 0, 0, 1}; + llvm::SmallVector operandSegments{0, 0, 0, 0, 1}; llvm::SmallVector operands{updateDeviceOp.getResult()}; createSimpleOp(builder, loc, operands, operandSegments); @@ -219,7 +219,7 @@ static void createDeclareDeallocFuncWithArg( builder, loc, loadOp, asFortran, bounds, /*structured=*/false, /*implicit=*/true, mlir::acc::DataClause::acc_update_device, loadOp.getType()); - llvm::SmallVector operandSegments{0, 0, 0, 0, 0, 1}; + llvm::SmallVector operandSegments{0, 0, 0, 0, 1}; llvm::SmallVector operands{updateDeviceOp.getResult()}; createSimpleOp(builder, loc, operands, operandSegments); modBuilder.setInsertionPointAfter(postDeallocOp); @@ -1416,27 +1416,35 @@ static void genAsyncClause(Fortran::lower::AbstractConverter &converter, } } -static void genDeviceTypeClause( - Fortran::lower::AbstractConverter &converter, mlir::Location clauseLocation, +static mlir::acc::DeviceType +getDeviceType(Fortran::parser::AccDeviceTypeExpr::Device device) { + switch (device) { + case Fortran::parser::AccDeviceTypeExpr::Device::Star: + return mlir::acc::DeviceType::Star; + case Fortran::parser::AccDeviceTypeExpr::Device::Default: + return mlir::acc::DeviceType::Default; + case Fortran::parser::AccDeviceTypeExpr::Device::Nvidia: + return mlir::acc::DeviceType::Nvidia; + case Fortran::parser::AccDeviceTypeExpr::Device::Radeon: + return mlir::acc::DeviceType::Radeon; + case Fortran::parser::AccDeviceTypeExpr::Device::Host: + return mlir::acc::DeviceType::Host; + case Fortran::parser::AccDeviceTypeExpr::Device::Multicore: + return mlir::acc::DeviceType::Multicore; + } + return mlir::acc::DeviceType::Default; +} + +static void gatherDeviceTypeAttrs( + fir::FirOpBuilder &builder, mlir::Location clauseLocation, const Fortran::parser::AccClause::DeviceType *deviceTypeClause, - llvm::SmallVectorImpl &operands, + llvm::SmallVector &deviceTypes, Fortran::lower::StatementContext &stmtCtx) { const Fortran::parser::AccDeviceTypeExprList &deviceTypeExprList = deviceTypeClause->v; - for (const auto &deviceTypeExpr : deviceTypeExprList.v) { - const auto &expr = std::get>( - deviceTypeExpr.t); - if (expr) { - operands.push_back(fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(expr), stmtCtx, &clauseLocation))); - } else { - // * was passed as value and will be represented as a special constant. - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::Value star = firOpBuilder.createIntegerConstant( - clauseLocation, firOpBuilder.getIndexType(), starCst); - operands.push_back(star); - } - } + for (const auto &deviceTypeExpr : deviceTypeExprList.v) + deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( + builder.getContext(), getDeviceType(deviceTypeExpr.v))); } static void genIfClause(Fortran::lower::AbstractConverter &converter, @@ -2443,10 +2451,10 @@ genACCInitShutdownOp(Fortran::lower::AbstractConverter &converter, mlir::Location currentLocation, const Fortran::parser::AccClauseList &accClauseList) { mlir::Value ifCond, deviceNum; - llvm::SmallVector deviceTypeOperands; - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); Fortran::lower::StatementContext stmtCtx; + llvm::SmallVector deviceTypes; // Lower clauses values mapped to operands. // Keep track of each group of operands separately as clauses can appear @@ -2464,19 +2472,23 @@ genACCInitShutdownOp(Fortran::lower::AbstractConverter &converter, } else if (const auto *deviceTypeClause = std::get_if( &clause.u)) { - genDeviceTypeClause(converter, clauseLocation, deviceTypeClause, - deviceTypeOperands, stmtCtx); + gatherDeviceTypeAttrs(builder, clauseLocation, deviceTypeClause, + deviceTypes, stmtCtx); } } // Prepare the operand segment size attribute and the operands value range. llvm::SmallVector operands; - llvm::SmallVector operandSegments; - addOperands(operands, operandSegments, deviceTypeOperands); + llvm::SmallVector operandSegments; + addOperand(operands, operandSegments, deviceNum); addOperand(operands, operandSegments, ifCond); - createSimpleOp(firOpBuilder, currentLocation, operands, operandSegments); + Op op = + createSimpleOp(builder, currentLocation, operands, operandSegments); + if (!deviceTypes.empty()) + op.setDeviceTypesAttr( + mlir::ArrayAttr::get(builder.getContext(), deviceTypes)); } void genACCSetOp(Fortran::lower::AbstractConverter &converter, @@ -2485,8 +2497,9 @@ void genACCSetOp(Fortran::lower::AbstractConverter &converter, mlir::Value ifCond, deviceNum, defaultAsync; llvm::SmallVector deviceTypeOperands; - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); Fortran::lower::StatementContext stmtCtx; + llvm::SmallVector deviceTypes; // Lower clauses values mapped to operands. // Keep track of each group of operands separately as clauses can appear @@ -2509,21 +2522,24 @@ void genACCSetOp(Fortran::lower::AbstractConverter &converter, } else if (const auto *deviceTypeClause = std::get_if( &clause.u)) { - genDeviceTypeClause(converter, clauseLocation, deviceTypeClause, - deviceTypeOperands, stmtCtx); + gatherDeviceTypeAttrs(builder, clauseLocation, deviceTypeClause, + deviceTypes, stmtCtx); } } // Prepare the operand segment size attribute and the operands value range. llvm::SmallVector operands; - llvm::SmallVector operandSegments; - addOperands(operands, operandSegments, deviceTypeOperands); + llvm::SmallVector operandSegments; addOperand(operands, operandSegments, defaultAsync); addOperand(operands, operandSegments, deviceNum); addOperand(operands, operandSegments, ifCond); - createSimpleOp(firOpBuilder, currentLocation, operands, - operandSegments); + auto op = createSimpleOp(builder, currentLocation, operands, + operandSegments); + if (!deviceTypes.empty()) { + assert(deviceTypes.size() == 1 && "expect only one value for acc.set"); + op.setDeviceTypeAttr(mlir::cast(deviceTypes[0])); + } } static void @@ -2535,6 +2551,7 @@ genACCUpdateOp(Fortran::lower::AbstractConverter &converter, mlir::Value ifCond, async, waitDevnum; llvm::SmallVector dataClauseOperands, updateHostOperands, waitOperands, deviceTypeOperands; + llvm::SmallVector deviceTypes; // Async and wait clause have optional values but can be present with // no value as well. When there is no value, the op has an attribute to @@ -2563,8 +2580,8 @@ genACCUpdateOp(Fortran::lower::AbstractConverter &converter, } else if (const auto *deviceTypeClause = std::get_if( &clause.u)) { - genDeviceTypeClause(converter, clauseLocation, deviceTypeClause, - deviceTypeOperands, stmtCtx); + gatherDeviceTypeAttrs(builder, clauseLocation, deviceTypeClause, + deviceTypes, stmtCtx); } else if (const auto *hostClause = std::get_if(&clause.u)) { genDataOperandOperations( @@ -2602,11 +2619,13 @@ genACCUpdateOp(Fortran::lower::AbstractConverter &converter, addOperand(operands, operandSegments, async); addOperand(operands, operandSegments, waitDevnum); addOperands(operands, operandSegments, waitOperands); - addOperands(operands, operandSegments, deviceTypeOperands); addOperands(operands, operandSegments, dataClauseOperands); mlir::acc::UpdateOp updateOp = createSimpleOp( builder, currentLocation, operands, operandSegments); + if (!deviceTypes.empty()) + updateOp.setDeviceTypesAttr( + mlir::ArrayAttr::get(builder.getContext(), deviceTypes)); genDataExitOperations( builder, updateHostOperands, /*structured=*/false); @@ -2787,7 +2806,7 @@ static void createDeclareAllocFunc(mlir::OpBuilder &modBuilder, builder, loc, addrOp, asFortranDesc, bounds, /*structured=*/false, /*implicit=*/true, mlir::acc::DataClause::acc_update_device, addrOp.getType()); - llvm::SmallVector operandSegments{0, 0, 0, 0, 0, 1}; + llvm::SmallVector operandSegments{0, 0, 0, 0, 1}; llvm::SmallVector operands{updateDeviceOp.getResult()}; createSimpleOp(builder, loc, operands, operandSegments); @@ -2863,7 +2882,7 @@ static void createDeclareDeallocFunc(mlir::OpBuilder &modBuilder, builder, loc, addrOp, asFortran, bounds, /*structured=*/false, /*implicit=*/true, mlir::acc::DataClause::acc_update_device, addrOp.getType()); - llvm::SmallVector operandSegments{0, 0, 0, 0, 0, 1}; + llvm::SmallVector operandSegments{0, 0, 0, 0, 1}; llvm::SmallVector operands{updateDeviceOp.getResult()}; createSimpleOp(builder, loc, operands, operandSegments); modBuilder.setInsertionPointAfter(postDeallocOp); diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp index 131f7332a69701..5b9267e0e17c6d 100644 --- a/flang/lib/Parser/openacc-parsers.cpp +++ b/flang/lib/Parser/openacc-parsers.cpp @@ -53,9 +53,15 @@ TYPE_PARSER(construct(scalarIntExpr) || construct("*" >> construct>())) TYPE_PARSER(construct(nonemptyList(Parser{}))) -TYPE_PARSER(construct(scalarIntExpr) || - construct( - "*" >> construct>())) +TYPE_PARSER(sourced(construct( + first("*" >> pure(AccDeviceTypeExpr::Device::Star), + "DEFAULT" >> pure(AccDeviceTypeExpr::Device::Default), + "NVIDIA" >> pure(AccDeviceTypeExpr::Device::Nvidia), + "ACC_DEVICE_NVIDIA" >> pure(AccDeviceTypeExpr::Device::Nvidia), + "RADEON" >> pure(AccDeviceTypeExpr::Device::Radeon), + "HOST" >> pure(AccDeviceTypeExpr::Device::Host), + "MULTICORE" >> pure(AccDeviceTypeExpr::Device::Multicore))))) + TYPE_PARSER( construct(nonemptyList(Parser{}))) diff --git a/flang/test/Lower/OpenACC/acc-init.f90 b/flang/test/Lower/OpenACC/acc-init.f90 index de940426b6f1c0..d1fd638c7ac0e8 100644 --- a/flang/test/Lower/OpenACC/acc-init.f90 +++ b/flang/test/Lower/OpenACC/acc-init.f90 @@ -4,6 +4,7 @@ ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s subroutine acc_init + implicit none logical :: ifCondition = .TRUE. integer :: ifInt = 1 @@ -23,15 +24,16 @@ subroutine acc_init !CHECK: [[DEVNUM:%.*]] = arith.constant 1 : i32 !CHECK: acc.init device_num([[DEVNUM]] : i32){{$}} - !$acc init device_num(1) device_type(1, 2) + !$acc init device_num(1) device_type(host, multicore) !CHECK: [[DEVNUM:%.*]] = arith.constant 1 : i32 -!CHECK: [[DEVTYPE1:%.*]] = arith.constant 1 : i32 -!CHECK: [[DEVTYPE2:%.*]] = arith.constant 2 : i32 -!CHECK: acc.init device_type([[DEVTYPE1]], [[DEVTYPE2]] : i32, i32) device_num([[DEVNUM]] : i32){{$}} +!CHECK: acc.init device_num([[DEVNUM]] : i32) attributes {device_types = [#acc.device_type, #acc.device_type]} !$acc init if(ifInt) !CHECK: %[[IFINT:.*]] = fir.load %{{.*}} : !fir.ref !CHECK: %[[CONV:.*]] = fir.convert %[[IFINT]] : (i32) -> i1 !CHECK: acc.init if(%[[CONV]]) + !$acc init device_type(nvidia) +!CHECK: acc.init attributes {device_types = [#acc.device_type]} + end subroutine acc_init diff --git a/flang/test/Lower/OpenACC/acc-set.f90 b/flang/test/Lower/OpenACC/acc-set.f90 index 52baedeafecb2b..39bf26e0072b7c 100644 --- a/flang/test/Lower/OpenACC/acc-set.f90 +++ b/flang/test/Lower/OpenACC/acc-set.f90 @@ -14,7 +14,7 @@ program test_acc_set !$acc set device_type(*) -!$acc set device_type(0) +!$acc set device_type(multicore) end @@ -34,10 +34,8 @@ program test_acc_set ! CHECK: %[[C0:.*]] = arith.constant 0 : i32 ! CHECK: acc.set device_num(%[[C0]] : i32) -! CHECK: %[[C_1:.*]] = arith.constant -1 : index -! CHECK: acc.set device_type(%[[C_1]] : index) +! CHECK: acc.set attributes {device_type = #acc.device_type<*>} -! CHECK: %[[C0:.*]] = arith.constant 0 : i32 -! CHECK: acc.set device_type(%[[C0]] : i32) +! CHECK: acc.set attributes {device_type = #acc.device_type} diff --git a/flang/test/Lower/OpenACC/acc-shutdown.f90 b/flang/test/Lower/OpenACC/acc-shutdown.f90 index 49e1acc546d900..f63f5d62b4fe92 100644 --- a/flang/test/Lower/OpenACC/acc-shutdown.f90 +++ b/flang/test/Lower/OpenACC/acc-shutdown.f90 @@ -22,10 +22,8 @@ subroutine acc_shutdown !CHECK: [[DEVNUM:%.*]] = arith.constant 1 : i32 !CHECK: acc.shutdown device_num([[DEVNUM]] : i32){{$}} - !$acc shutdown device_num(1) device_type(1, 2) + !$acc shutdown device_num(1) device_type(default, nvidia) !CHECK: [[DEVNUM:%.*]] = arith.constant 1 : i32 -!CHECK: [[DEVTYPE1:%.*]] = arith.constant 1 : i32 -!CHECK: [[DEVTYPE2:%.*]] = arith.constant 2 : i32 -!CHECK: acc.shutdown device_type([[DEVTYPE1]], [[DEVTYPE2]] : i32, i32) device_num([[DEVNUM]] : i32){{$}} +!CHECK: acc.shutdown device_num([[DEVNUM]] : i32) attributes {device_types = [#acc.device_type, #acc.device_type]} end subroutine acc_shutdown diff --git a/flang/test/Lower/OpenACC/acc-update.f90 b/flang/test/Lower/OpenACC/acc-update.f90 index f7343a69285f85..5d5f5733ef7f1a 100644 --- a/flang/test/Lower/OpenACC/acc-update.f90 +++ b/flang/test/Lower/OpenACC/acc-update.f90 @@ -145,20 +145,17 @@ subroutine acc_update ! FIR: acc.update_host accPtr(%[[DEVPTR_A]] : !fir.ref>) bounds(%{{.*}}, %{{.*}}) to varPtr(%[[A]] : !fir.ref>) {name = "a", structured = false} ! HLFIR: acc.update_host accPtr(%[[DEVPTR_A]] : !fir.ref>) bounds(%{{.*}}, %{{.*}}) to varPtr(%[[DECLA]]#1 : !fir.ref>) {name = "a", structured = false} - !$acc update host(a) device_type(1, 2) + !$acc update host(a) device_type(default, host) ! FIR: %[[DEVPTR_A:.*]] = acc.getdeviceptr varPtr(%[[A]] : !fir.ref>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref> {dataClause = #acc, name = "a", structured = false} ! HLFIR: %[[DEVPTR_A:.*]] = acc.getdeviceptr varPtr(%[[DECLA]]#1 : !fir.ref>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref> {dataClause = #acc, name = "a", structured = false} -! CHECK: [[DEVTYPE1:%.*]] = arith.constant 1 : i32 -! CHECK: [[DEVTYPE2:%.*]] = arith.constant 2 : i32 -! CHECK: acc.update device_type([[DEVTYPE1]], [[DEVTYPE2]] : i32, i32) dataOperands(%[[DEVPTR_A]] : !fir.ref>){{$}} +! CHECK: acc.update dataOperands(%[[DEVPTR_A]] : !fir.ref>) attributes {device_types = [#acc.device_type, #acc.device_type]} ! FIR: acc.update_host accPtr(%[[DEVPTR_A]] : !fir.ref>) bounds(%{{.*}}, %{{.*}}) to varPtr(%[[A]] : !fir.ref>) {name = "a", structured = false} ! HLFIR: acc.update_host accPtr(%[[DEVPTR_A]] : !fir.ref>) bounds(%{{.*}}, %{{.*}}) to varPtr(%[[DECLA]]#1 : !fir.ref>) {name = "a", structured = false} !$acc update host(a) device_type(*) ! FIR: %[[DEVPTR_A:.*]] = acc.getdeviceptr varPtr(%[[A]] : !fir.ref>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref> {dataClause = #acc, name = "a", structured = false} ! HLFIR: %[[DEVPTR_A:.*]] = acc.getdeviceptr varPtr(%[[DECLA]]#1 : !fir.ref>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref> {dataClause = #acc, name = "a", structured = false} -! CHECK: [[DEVTYPE3:%.*]] = arith.constant -1 : index -! CHECK: acc.update device_type([[DEVTYPE3]] : index) dataOperands(%[[DEVPTR_A]] : !fir.ref>){{$}} +! CHECK: acc.update dataOperands(%[[DEVPTR_A]] : !fir.ref>) attributes {device_types = [#acc.device_type<*>]} ! FIR: acc.update_host accPtr(%[[DEVPTR_A]] : !fir.ref>) bounds(%{{.*}}, %{{.*}}) to varPtr(%[[A]] : !fir.ref>) {name = "a", structured = false} ! HLFIR: acc.update_host accPtr(%[[DEVPTR_A]] : !fir.ref>) bounds(%{{.*}}, %{{.*}}) to varPtr(%[[DECLA]]#1 : !fir.ref>) {name = "a", structured = false} diff --git a/flang/test/Semantics/OpenACC/acc-data.f90 b/flang/test/Semantics/OpenACC/acc-data.f90 index 17e0624b8cf24d..1a7a6f95f3d891 100644 --- a/flang/test/Semantics/OpenACC/acc-data.f90 +++ b/flang/test/Semantics/OpenACC/acc-data.f90 @@ -184,7 +184,7 @@ program openacc_data_validity !$acc data copy(aa) wait !$acc end data - !$acc data copy(aa) device_type(1) wait + !$acc data copy(aa) device_type(default) wait !$acc end data end program openacc_data_validity diff --git a/flang/test/Semantics/OpenACC/acc-init-validity.f90 b/flang/test/Semantics/OpenACC/acc-init-validity.f90 index f54898f73fdce2..3b594a25217c09 100644 --- a/flang/test/Semantics/OpenACC/acc-init-validity.f90 +++ b/flang/test/Semantics/OpenACC/acc-init-validity.f90 @@ -20,9 +20,9 @@ program openacc_init_validity !$acc init if(ifInt) !$acc init device_num(1) !$acc init device_num(i) - !$acc init device_type(i) - !$acc init device_type(2, i, j) - !$acc init device_num(i) device_type(i, j) if(ifCondition) + !$acc init device_type(default) + !$acc init device_type(nvidia, radeon) + !$acc init device_num(i) device_type(host, multicore) if(ifCondition) !$acc parallel !ERROR: Directive INIT may not be called within a compute region @@ -94,7 +94,7 @@ program openacc_init_validity !$acc init device_num(1) device_num(i) !ERROR: At most one DEVICE_TYPE clause can appear on the INIT directive - !$acc init device_type(2) device_type(i, j) + !$acc init device_type(nvidia) device_type(default, *) !ERROR: Must have LOGICAL or INTEGER type !$acc init if(ifReal) diff --git a/flang/test/Semantics/OpenACC/acc-kernels-loop.f90 b/flang/test/Semantics/OpenACC/acc-kernels-loop.f90 index 5facd473778803..1a280f7c54f5cd 100644 --- a/flang/test/Semantics/OpenACC/acc-kernels-loop.f90 +++ b/flang/test/Semantics/OpenACC/acc-kernels-loop.f90 @@ -264,12 +264,12 @@ program openacc_kernels_loop_validity a(i) = 3.14 end do - !$acc kernels loop device_type(1) + !$acc kernels loop device_type(multicore) do i = 1, N a(i) = 3.14 end do - !$acc kernels loop device_type(1, 3) + !$acc kernels loop device_type(host, multicore) do i = 1, N a(i) = 3.14 end do diff --git a/flang/test/Semantics/OpenACC/acc-kernels.f90 b/flang/test/Semantics/OpenACC/acc-kernels.f90 index a2c9c9e8be99b1..de220f7c7ddf7c 100644 --- a/flang/test/Semantics/OpenACC/acc-kernels.f90 +++ b/flang/test/Semantics/OpenACC/acc-kernels.f90 @@ -122,10 +122,10 @@ program openacc_kernels_validity !$acc kernels device_type(*) !$acc end kernels - !$acc kernels device_type(1) + !$acc kernels device_type(default) !$acc end kernels - !$acc kernels device_type(1, 3) + !$acc kernels device_type(default, host) !$acc end kernels !$acc kernels device_type(*) async wait num_gangs(8) num_workers(8) vector_length(128) diff --git a/flang/test/Semantics/OpenACC/acc-parallel.f90 b/flang/test/Semantics/OpenACC/acc-parallel.f90 index e85922e37c63e0..0e8d240d019983 100644 --- a/flang/test/Semantics/OpenACC/acc-parallel.f90 +++ b/flang/test/Semantics/OpenACC/acc-parallel.f90 @@ -111,10 +111,10 @@ program openacc_parallel_validity !$acc parallel device_type(*) !$acc end parallel - !$acc parallel device_type(1) + !$acc parallel device_type(default) !$acc end parallel - !$acc parallel device_type(1, 3) + !$acc parallel device_type(default, host) !$acc end parallel !ERROR: Clause PRIVATE is not allowed after clause DEVICE_TYPE on the PARALLEL directive @@ -131,7 +131,7 @@ program openacc_parallel_validity !$acc parallel device_type(*) num_gangs(8) !$acc end parallel - !$acc parallel device_type(1) async device_type(2) wait + !$acc parallel device_type(*) async device_type(host) wait !$acc end parallel !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the PARALLEL directive diff --git a/flang/test/Semantics/OpenACC/acc-set-validity.f90 b/flang/test/Semantics/OpenACC/acc-set-validity.f90 index 896e39df6535c9..74522b30d11bc4 100644 --- a/flang/test/Semantics/OpenACC/acc-set-validity.f90 +++ b/flang/test/Semantics/OpenACC/acc-set-validity.f90 @@ -90,17 +90,17 @@ program openacc_clause_validity !$acc set device_num(1) device_num(i) !ERROR: At most one DEVICE_TYPE clause can appear on the SET directive - !$acc set device_type(i) device_type(2) + !$acc set device_type(*) device_type(nvidia) !$acc set default_async(2) !$acc set default_async(i) !$acc set device_num(1) !$acc set device_num(i) - !$acc set device_type(i) - !$acc set device_num(1) default_async(2) device_type(2) + !$acc set device_type(default) + !$acc set device_num(1) default_async(2) device_type(*) !ERROR: The DEVICE_TYPE clause on the SET directive accepts only one value - !$acc set device_type(1, 2) + !$acc set device_type(*, default) !ERROR: At least one of DEFAULT_ASYNC, DEVICE_NUM, DEVICE_TYPE clause must appear on the SET directive !$acc set diff --git a/flang/test/Semantics/OpenACC/acc-shutdown-validity.f90 b/flang/test/Semantics/OpenACC/acc-shutdown-validity.f90 index de40963f99e048..43aed4fc98f42e 100644 --- a/flang/test/Semantics/OpenACC/acc-shutdown-validity.f90 +++ b/flang/test/Semantics/OpenACC/acc-shutdown-validity.f90 @@ -80,9 +80,9 @@ program openacc_shutdown_validity !$acc shutdown if(ifCondition) !$acc shutdown device_num(1) !$acc shutdown device_num(i) - !$acc shutdown device_type(i) - !$acc shutdown device_type(2, i, j) - !$acc shutdown device_num(i) device_type(i, j) if(ifCondition) + !$acc shutdown device_type(*) + !$acc shutdown device_type(*, default, host) + !$acc shutdown device_num(i) device_type(default, host) if(ifCondition) !ERROR: At most one IF clause can appear on the SHUTDOWN directive !$acc shutdown if(.TRUE.) if(ifCondition) @@ -91,6 +91,6 @@ program openacc_shutdown_validity !$acc shutdown device_num(1) device_num(i) !ERROR: At most one DEVICE_TYPE clause can appear on the SHUTDOWN directive - !$acc shutdown device_type(2) device_type(i, j) + !$acc shutdown device_type(*) device_type(host, default) end program openacc_shutdown_validity diff --git a/flang/test/Semantics/OpenACC/acc-update-validity.f90 b/flang/test/Semantics/OpenACC/acc-update-validity.f90 index a409ba5ea549f8..1e75742e63e97b 100644 --- a/flang/test/Semantics/OpenACC/acc-update-validity.f90 +++ b/flang/test/Semantics/OpenACC/acc-update-validity.f90 @@ -53,7 +53,7 @@ program openacc_update_validity !$acc update host(bb) device_type(*) wait - !$acc update self(cc) device_type(1,2) async device_type(3) wait + !$acc update self(cc) device_type(host,multicore) async device_type(*) wait !ERROR: At most one IF clause can appear on the UPDATE directive !$acc update device(aa) if(.true.) if(ifCondition) diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 10018c9fc7e27e..3c5173fdda7f66 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -155,6 +155,30 @@ def DeclareActionAttr : OpenACC_Attr<"DeclareAction", "declare_action"> { let assemblyFormat = "`<` struct(params) `>`"; } +// Device type enumeration. +def OpenACC_DeviceTypeStar : I32EnumAttrCase<"Star", 0, "*">; +def OpenACC_DeviceTypeDefault : I32EnumAttrCase<"Default", 1, "default">; +def OpenACC_DeviceTypeHost : I32EnumAttrCase<"Host", 2, "host">; +def OpenACC_DeviceTypeMulticore : I32EnumAttrCase<"Multicore", 3, "multicore">; +def OpenACC_DeviceTypeNvidia : I32EnumAttrCase<"Nvidia", 4, "nvidia">; +def OpenACC_DeviceTypeRadeon : I32EnumAttrCase<"Radeon", 5, "radeon">; + + +def OpenACC_DeviceType : I32EnumAttr<"DeviceType", + "built-in device type supported by OpenACC", + [OpenACC_DeviceTypeStar, OpenACC_DeviceTypeDefault, + OpenACC_DeviceTypeHost, OpenACC_DeviceTypeMulticore, + OpenACC_DeviceTypeNvidia, OpenACC_DeviceTypeRadeon + ]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::acc"; +} +def OpenACC_DeviceTypeAttr : EnumAttr { + let assemblyFormat = [{ ```<` $value `>` }]; +} + // Used for data specification in data clauses (2.7.1). // Either (or both) extent and upperbound must be specified. def OpenACC_DataBoundsOp : OpenACC_Op<"bounds", @@ -1624,14 +1648,12 @@ def OpenACC_InitOp : OpenACC_Op<"init", [AttrSizedOperandSegments]> { ``` }]; - let arguments = (ins Variadic:$deviceTypeOperands, + let arguments = (ins OptionalAttr>:$device_types, Optional:$deviceNumOperand, Optional:$ifCond); let assemblyFormat = [{ - oilist( - `device_type` `(` $deviceTypeOperands `:` type($deviceTypeOperands) `)` - | `device_num` `(` $deviceNumOperand `:` type($deviceNumOperand) `)` + oilist(`device_num` `(` $deviceNumOperand `:` type($deviceNumOperand) `)` | `if` `(` $ifCond `)` ) attr-dict-with-keyword }]; @@ -1657,13 +1679,12 @@ def OpenACC_ShutdownOp : OpenACC_Op<"shutdown", [AttrSizedOperandSegments]> { ``` }]; - let arguments = (ins Variadic:$deviceTypeOperands, + let arguments = (ins OptionalAttr>:$device_types, Optional:$deviceNumOperand, Optional:$ifCond); let assemblyFormat = [{ - oilist(`device_type` `(` $deviceTypeOperands `:` type($deviceTypeOperands) `)` - |`device_num` `(` $deviceNumOperand `:` type($deviceNumOperand) `)` + oilist(`device_num` `(` $deviceNumOperand `:` type($deviceNumOperand) `)` |`if` `(` $ifCond `)` ) attr-dict-with-keyword }]; @@ -1687,15 +1708,13 @@ def OpenACC_SetOp : OpenACC_Op<"set", [AttrSizedOperandSegments]> { ``` }]; - let arguments = (ins Optional:$deviceType, + let arguments = (ins OptionalAttr:$device_type, Optional:$defaultAsync, Optional:$deviceNum, Optional:$ifCond); let assemblyFormat = [{ - oilist( - `device_type` `(` $deviceType `:` type($deviceType) `)` - | `default_async` `(` $defaultAsync `:` type($defaultAsync) `)` + oilist(`default_async` `(` $defaultAsync `:` type($defaultAsync) `)` | `device_num` `(` $deviceNum `:` type($deviceNum) `)` | `if` `(` $ifCond `)` ) attr-dict-with-keyword @@ -1729,7 +1748,7 @@ def OpenACC_UpdateOp : OpenACC_Op<"update", [AttrSizedOperandSegments]> { Variadic:$waitOperands, UnitAttr:$async, UnitAttr:$wait, - Variadic:$deviceTypeOperands, + OptionalAttr>:$device_types, Variadic:$dataClauseOperands, UnitAttr:$ifPresent); @@ -1746,8 +1765,6 @@ def OpenACC_UpdateOp : OpenACC_Op<"update", [AttrSizedOperandSegments]> { `if` `(` $ifCond `)` | `async` `(` $asyncOperand `:` type($asyncOperand) `)` | `wait_devnum` `(` $waitDevnum `:` type($waitDevnum) `)` - | `device_type` `(` $deviceTypeOperands `:` - type($deviceTypeOperands) `)` | `wait` `(` $waitOperands `:` type($waitOperands) `)` | `dataOperands` `(` $dataClauseOperands `:` type($dataClauseOperands) `)` ) diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 98c800033cbe91..d3747d33104091 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -1240,7 +1240,7 @@ LogicalResult acc::SetOp::verify() { while ((currOp = currOp->getParentOp())) if (isComputeOperation(currOp)) return emitOpError("cannot be nested in a compute operation"); - if (!getDeviceType() && !getDefaultAsync() && !getDeviceNum()) + if (!getDeviceTypeAttr() && !getDefaultAsync() && !getDeviceNum()) return emitOpError("at least one default_async, device_num, or device_type " "operand must appear"); return success(); @@ -1285,8 +1285,7 @@ Value UpdateOp::getDataOperand(unsigned i) { unsigned numOptional = getAsyncOperand() ? 1 : 0; numOptional += getWaitDevnum() ? 1 : 0; numOptional += getIfCond() ? 1 : 0; - return getOperand(getWaitOperands().size() + getDeviceTypeOperands().size() + - numOptional + i); + return getOperand(getWaitOperands().size() + numOptional + i); } void UpdateOp::getCanonicalizationPatterns(RewritePatternSet &results, diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir index ff92eab478bb4f..b5241a8e4dc47f 100644 --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -475,7 +475,7 @@ acc.parallel num_gangs(%i64value, %i64value, %i64value, %i64value : i64, i64, i6 %i64value = arith.constant 1 : i64 acc.parallel { // expected-error@+1 {{'acc.set' op cannot be nested in a compute operation}} - acc.set device_type(%i64value : i64) + acc.set attributes {device_type = #acc.device_type} acc.yield } diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index d1950b1fb3f291..cf7a838f55ef85 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -974,7 +974,7 @@ func.func @testupdateop(%a: memref, %b: memref, %c: memref) -> () acc.update async(%idxValue: index) dataOperands(%0: memref) acc.update wait_devnum(%i64Value: i64) wait(%i32Value, %idxValue : i32, index) dataOperands(%0: memref) acc.update if(%ifCond) dataOperands(%0: memref) - acc.update device_type(%i32Value : i32) dataOperands(%0: memref) + acc.update dataOperands(%0: memref) attributes {acc.device_types = [#acc.device_type]} acc.update dataOperands(%0, %1, %2 : memref, memref, memref) acc.update dataOperands(%0, %1, %2 : memref, memref, memref) attributes {async} acc.update dataOperands(%0, %1, %2 : memref, memref, memref) attributes {wait} @@ -993,7 +993,7 @@ func.func @testupdateop(%a: memref, %b: memref, %c: memref) -> () // CHECK: acc.update async([[IDXVALUE]] : index) dataOperands(%{{.*}} : memref) // CHECK: acc.update wait_devnum([[I64VALUE]] : i64) wait([[I32VALUE]], [[IDXVALUE]] : i32, index) dataOperands(%{{.*}} : memref) // CHECK: acc.update if([[IFCOND]]) dataOperands(%{{.*}} : memref) -// CHECK: acc.update device_type([[I32VALUE]] : i32) dataOperands(%{{.*}} : memref) +// CHECK: acc.update dataOperands(%{{.*}} : memref) attributes {acc.device_types = [#acc.device_type]} // CHECK: acc.update dataOperands(%{{.*}}, %{{.*}}, %{{.*}} : memref, memref, memref) // CHECK: acc.update dataOperands(%{{.*}}, %{{.*}}, %{{.*}} : memref, memref, memref) attributes {async} // CHECK: acc.update dataOperands(%{{.*}}, %{{.*}}, %{{.*}} : memref, memref, memref) attributes {wait} @@ -1047,8 +1047,7 @@ acc.wait if(%ifCond) %idxValue = arith.constant 1 : index %ifCond = arith.constant true acc.init -acc.init device_type(%i32Value : i32) -acc.init device_type(%i32Value, %i32Value2 : i32, i32) +acc.init attributes {acc.device_types = [#acc.device_type]} acc.init device_num(%i64Value : i64) acc.init device_num(%i32Value : i32) acc.init device_num(%idxValue : index) @@ -1062,8 +1061,7 @@ acc.init device_num(%idxValue : index) if(%ifCond) // CHECK: [[IDXVALUE:%.*]] = arith.constant 1 : index // CHECK: [[IFCOND:%.*]] = arith.constant true // CHECK: acc.init -// CHECK: acc.init device_type([[I32VALUE]] : i32) -// CHECK: acc.init device_type([[I32VALUE]], [[I32VALUE2]] : i32, i32) +// CHECK: acc.init attributes {acc.device_types = [#acc.device_type]} // CHECK: acc.init device_num([[I64VALUE]] : i64) // CHECK: acc.init device_num([[I32VALUE]] : i32) // CHECK: acc.init device_num([[IDXVALUE]] : index) @@ -1079,8 +1077,7 @@ acc.init device_num(%idxValue : index) if(%ifCond) %idxValue = arith.constant 1 : index %ifCond = arith.constant true acc.shutdown -acc.shutdown device_type(%i32Value : i32) -acc.shutdown device_type(%i32Value, %i32Value2 : i32, i32) +acc.shutdown attributes {acc.device_types = [#acc.device_type]} acc.shutdown device_num(%i64Value : i64) acc.shutdown device_num(%i32Value : i32) acc.shutdown device_num(%idxValue : index) @@ -1094,8 +1091,7 @@ acc.shutdown device_num(%idxValue : index) if(%ifCond) // CHECK: [[IDXVALUE:%.*]] = arith.constant 1 : index // CHECK: [[IFCOND:%.*]] = arith.constant true // CHECK: acc.shutdown -// CHECK: acc.shutdown device_type([[I32VALUE]] : i32) -// CHECK: acc.shutdown device_type([[I32VALUE]], [[I32VALUE2]] : i32, i32) +// CHECK: acc.shutdown attributes {acc.device_types = [#acc.device_type]} // CHECK: acc.shutdown device_num([[I64VALUE]] : i64) // CHECK: acc.shutdown device_num([[I32VALUE]] : i32) // CHECK: acc.shutdown device_num([[IDXVALUE]] : index) @@ -1718,7 +1714,7 @@ func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10x %i32Value2 = arith.constant 2 : i32 %idxValue = arith.constant 1 : index %ifCond = arith.constant true -acc.set device_type(%i32Value : i32) +acc.set attributes {device_type = #acc.device_type} acc.set device_num(%i64Value : i64) acc.set device_num(%i32Value : i32) acc.set device_num(%idxValue : index) @@ -1730,7 +1726,7 @@ acc.set default_async(%i32Value : i32) // CHECK: [[I32VALUE2:%.*]] = arith.constant 2 : i32 // CHECK: [[IDXVALUE:%.*]] = arith.constant 1 : index // CHECK: [[IFCOND:%.*]] = arith.constant true -// CHECK: acc.set device_type([[I32VALUE]] : i32) +// CHECK: acc.set attributes {device_type = #acc.device_type} // CHECK: acc.set device_num([[I64VALUE]] : i64) // CHECK: acc.set device_num([[I32VALUE]] : i32) // CHECK: acc.set device_num([[IDXVALUE]] : index) From 284d136c4ade9469fcd0a391472789adb34e7a1e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 30 Oct 2023 09:58:51 -0700 Subject: [PATCH 045/144] [RISCV] Teach copyPhysReg to allow copies between GPR<->FPR32/FPR64 (#70525) This is needed because GISel emits copies instead of bitcasts like SelectionDAG. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 30 ++++++++++++++++ .../RISCV/GlobalISel/fpr-gpr-copy-rv32.ll | 19 +++++++++++ .../RISCV/GlobalISel/fpr-gpr-copy-rv64.ll | 34 +++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 996ef1c6f574a5..412fb7e7f7fc16 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -471,6 +471,36 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + if (RISCV::FPR32RegClass.contains(DstReg) && + RISCV::GPRRegClass.contains(SrcReg)) { + BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + if (RISCV::GPRRegClass.contains(DstReg) && + RISCV::FPR32RegClass.contains(SrcReg)) { + BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + if (RISCV::FPR64RegClass.contains(DstReg) && + RISCV::GPRRegClass.contains(SrcReg)) { + assert(STI.getXLen() == 64 && "Unexpected GPR size"); + BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + if (RISCV::GPRRegClass.contains(DstReg) && + RISCV::FPR64RegClass.contains(SrcReg)) { + assert(STI.getXLen() == 64 && "Unexpected GPR size"); + BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + // VR->VR copies. if (RISCV::VRRegClass.contains(DstReg, SrcReg)) { copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V); diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv32.ll b/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv32.ll new file mode 100644 index 00000000000000..1757e5550f81ae --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv32.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+f -target-abi=ilp32 \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=RV32I %s + +; Test copying between FPR32 and GPR on RV32. +; FIXME: This test should be replaced with a more general calling convention +; test once we have more FP implemented. + +define float @fadd(float %x, float %y) { +; RV32I-LABEL: fadd: +; RV32I: # %bb.0: +; RV32I-NEXT: fmv.w.x fa5, a0 +; RV32I-NEXT: fmv.w.x fa4, a1 +; RV32I-NEXT: fadd.s fa5, fa5, fa4 +; RV32I-NEXT: fmv.x.w a0, fa5 +; RV32I-NEXT: ret + %a = fadd float %x, %y + ret float %a +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll b/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll new file mode 100644 index 00000000000000..2eca0ad66f5c6b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -target-abi=lp64 \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=RV64I %s + +; Test copying between FPR64 and GPR on RV64. +; FIXME: This test should be replaced with a more general calling convention +; test once we have more FP implemented. + +define double @fadd_f64(double %x, double %y) { +; RV64I-LABEL: fadd_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: fmv.d.x fa5, a0 +; RV64I-NEXT: fmv.d.x fa4, a1 +; RV64I-NEXT: fadd.d fa5, fa5, fa4 +; RV64I-NEXT: fmv.x.d a0, fa5 +; RV64I-NEXT: ret + %a = fadd double %x, %y + ret double %a + +; Test copying between FPR32 and GPR on RV64. +; FIXME: This test should be replaced with a more general calling convention +; test once we have more FP implemented. + +define float @fadd_f32(float %x, float %y) { +; RV32I-LABEL: fadd: +; RV32I: # %bb.0: +; RV32I-NEXT: fmv.d.x fa5, a0 +; RV32I-NEXT: fmv.d.x fa4, a1 +; RV32I-NEXT: fadd.d fa5, fa5, fa4 +; RV32I-NEXT: fmv.x.d a0, fa5 +; RV32I-NEXT: ret + %a = fadd float %x, %y + ret float %a +} From c42b640208aa74c65cef5943bc05522780a72723 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Mon, 30 Oct 2023 10:00:40 -0700 Subject: [PATCH 046/144] Fix the DEVELOPER_DIR computation (#70528) The code was incorrectly going into the wrong direction by removing one component instead of appendeing /Developer to it. Due to fallback mechanisms in xcrun this never seemed to have caused any issues. --- lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm index e3506a01c606b7..33d94504fe70f8 100644 --- a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm +++ b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm @@ -461,13 +461,11 @@ static void ParseOSVersion(llvm::VersionTuple &version, NSString *Key) { // Invoke xcrun with the shlib dir. if (FileSpec fspec = HostInfo::GetShlibDir()) { if (FileSystem::Instance().Exists(fspec)) { - std::string contents_dir = - XcodeSDK::FindXcodeContentsDirectoryInPath(fspec.GetPath()); - llvm::StringRef shlib_developer_dir = - llvm::sys::path::parent_path(contents_dir); - if (!shlib_developer_dir.empty()) { - auto sdk = - xcrun(sdk_name, show_sdk_path, std::move(shlib_developer_dir)); + llvm::SmallString<0> shlib_developer_dir( + XcodeSDK::FindXcodeContentsDirectoryInPath(fspec.GetPath())); + llvm::sys::path::append(shlib_developer_dir, "Developer"); + if (FileSystem::Instance().Exists(shlib_developer_dir)) { + auto sdk = xcrun(sdk_name, show_sdk_path, shlib_developer_dir); if (!sdk) return sdk.takeError(); if (!sdk->empty()) From a1b4005bae860af92ffd895bd5f2e9ba6c0a70aa Mon Sep 17 00:00:00 2001 From: Jake Egan <5326451+jakeegan@users.noreply.github.com> Date: Mon, 30 Oct 2023 13:06:49 -0400 Subject: [PATCH 047/144] =?UTF-8?q?[clang][Module]=20Mark=20test=20unsuppo?= =?UTF-8?q?rted=20since=20objc=20doesn't=20have=20xcoff/g=E2=80=A6=20(#706?= =?UTF-8?q?61)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …off support Same as D135848. The newly added test fails with `fatal error: error in backend: Objective-C support is unimplemented for object file format`. --- clang/test/Modules/relative-resource-dir.m | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/Modules/relative-resource-dir.m b/clang/test/Modules/relative-resource-dir.m index 2efc61259fd789..e0d1b6d221d109 100644 --- a/clang/test/Modules/relative-resource-dir.m +++ b/clang/test/Modules/relative-resource-dir.m @@ -1,3 +1,4 @@ +// UNSUPPORTED: -zos, -aix // REQUIRES: shell // RUN: EXPECTED_RESOURCE_DIR=`%clang -print-resource-dir` && \ From 693941132e27d82f068acbb7f27e134989987de6 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 30 Oct 2023 10:07:58 -0700 Subject: [PATCH 048/144] [docs] mention that DenseMap has a SmallDenseMap variant (#70677) via https://github.com/llvm/llvm-project/pull/67699/files#r1375105711 --- llvm/docs/ProgrammersManual.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst index 22e95261e63925..5bc71bea77cdfc 100644 --- a/llvm/docs/ProgrammersManual.rst +++ b/llvm/docs/ProgrammersManual.rst @@ -2323,6 +2323,10 @@ construct, but cheap to compare against. The DenseMapInfo is responsible for defining the appropriate comparison and hashing methods for each alternate key type used. +DenseMap.h also contains a SmallDenseMap variant, that similar to +:ref:`SmallVector ` performs no heap allocation until the +number of elements in the template parameter N are exceeded. + .. _dss_valuemap: llvm/IR/ValueMap.h From c3f7ca78101b77fa522f059af520526ff878a5b0 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Mon, 30 Oct 2023 17:08:25 +0000 Subject: [PATCH 049/144] [lldb][Test] TestDataFormatterLibcxxChrono.py: skip test on older clang versions (#70544) These tests were failing on the LLDB public matrix build-bots for older clang versions: ``` clang-7: warning: argument unused during compilation: '-nostdlib++' [-Wunused-command-line-argument] error: invalid value 'c++20' in '-std=c++20' note: use 'c++98' or 'c++03' for 'ISO C++ 1998 with amendments' standard note: use 'gnu++98' or 'gnu++03' for 'ISO C++ 1998 with amendments and GNU extensions' standard note: use 'c++11' for 'ISO C++ 2011 with amendments' standard note: use 'gnu++11' for 'ISO C++ 2011 with amendments and GNU extensions' standard note: use 'c++14' for 'ISO C++ 2014 with amendments' standard note: use 'gnu++14' for 'ISO C++ 2014 with amendments and GNU extensions' standard note: use 'c++17' for 'ISO C++ 2017 with amendments' standard note: use 'gnu++17' for 'ISO C++ 2017 with amendments and GNU extensions' standard note: use 'c++2a' for 'Working draft for ISO C++ 2020' standard note: use 'gnu++2a' for 'Working draft for ISO C++ 2020 with GNU extensions' standard make: *** [main.o] Error 1 ``` The test fails because we try to compile it with `-std=c++20` (which is required for std::chrono::{days,weeks,months,years}) on clang versions that don't support the `-std=c++20` flag. We could change the test to conditionally compile the C++20 parts of the test based on the `-std=` flag and have two versions of the python tests, one for the C++11 chrono features and one for the C++20 features. This patch instead just disables the test on older clang versions (because it's simpler and we don't really lose important coverage). --- .../libcxx/chrono/TestDataFormatterLibcxxChrono.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/chrono/TestDataFormatterLibcxxChrono.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/chrono/TestDataFormatterLibcxxChrono.py index 076b0d07b88aec..b2f86817f3b0e8 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/chrono/TestDataFormatterLibcxxChrono.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/chrono/TestDataFormatterLibcxxChrono.py @@ -11,6 +11,7 @@ class LibcxxChronoDataFormatterTestCase(TestBase): @add_test_categories(["libc++"]) + @skipIf(compiler="clang", compiler_version=["<", "11.0"]) def test_with_run_command(self): """Test that that file and class static variables display correctly.""" self.build() From 77e88db6b7cd982769a852b0fedfc57b3374d1d9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 30 Oct 2023 10:12:56 -0700 Subject: [PATCH 050/144] [RISCV][GISel] Add missing curly brace to test. NFC --- llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll b/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll index 2eca0ad66f5c6b..287bbbad6d52d7 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/fpr-gpr-copy-rv64.ll @@ -16,6 +16,7 @@ define double @fadd_f64(double %x, double %y) { ; RV64I-NEXT: ret %a = fadd double %x, %y ret double %a +} ; Test copying between FPR32 and GPR on RV64. ; FIXME: This test should be replaced with a more general calling convention From 7358c26d6acaa6c393623fde7cbc70372d0c67a8 Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Mon, 30 Oct 2023 14:25:21 -0300 Subject: [PATCH 051/144] [flang] Check for overflows in RESHAPE folding (#68342) TotalElementCount() was modified to return std::optional, where std::nullopt means overflow occurred. Besides the additional check in RESHAPE folding, all callers of TotalElementCount() were changed, to also check for overflows. --- flang/include/flang/Evaluate/constant.h | 3 +- flang/include/flang/Evaluate/initial-image.h | 15 +++- flang/lib/Evaluate/constant.cpp | 31 ++++++-- flang/lib/Evaluate/fold-designator.cpp | 4 +- flang/lib/Evaluate/fold-implementation.h | 84 ++++++++++++-------- flang/lib/Evaluate/initial-image.cpp | 10 ++- flang/lib/Semantics/data-to-inits.cpp | 2 + flang/test/Semantics/reshape.f90 | 4 + 8 files changed, 107 insertions(+), 46 deletions(-) diff --git a/flang/include/flang/Evaluate/constant.h b/flang/include/flang/Evaluate/constant.h index 04474e2f49a0f8..8c841918bccbe8 100644 --- a/flang/include/flang/Evaluate/constant.h +++ b/flang/include/flang/Evaluate/constant.h @@ -46,7 +46,8 @@ inline int GetRank(const ConstantSubscripts &s) { return static_cast(s.size()); } -std::size_t TotalElementCount(const ConstantSubscripts &); +// Returns the number of elements of shape, if no overflow occurs. +std::optional TotalElementCount(const ConstantSubscripts &shape); // Validate dimension re-ordering like ORDER in RESHAPE. // On success, return a vector that can be used as dimOrder in diff --git a/flang/include/flang/Evaluate/initial-image.h b/flang/include/flang/Evaluate/initial-image.h index e1f9a68acb00c2..dc9a9bfbfdf220 100644 --- a/flang/include/flang/Evaluate/initial-image.h +++ b/flang/include/flang/Evaluate/initial-image.h @@ -22,7 +22,14 @@ namespace Fortran::evaluate { class InitialImage { public: - enum Result { Ok, NotAConstant, OutOfRange, SizeMismatch, LengthMismatch }; + enum Result { + Ok, + NotAConstant, + OutOfRange, + SizeMismatch, + LengthMismatch, + TooManyElems + }; explicit InitialImage(std::size_t bytes) : data_(bytes) {} InitialImage(InitialImage &&that) = default; @@ -60,7 +67,11 @@ class InitialImage { if (offset < 0 || offset + bytes > data_.size()) { return OutOfRange; } else { - auto elements{TotalElementCount(x.shape())}; + auto optElements{TotalElementCount(x.shape())}; + if (!optElements) { + return TooManyElems; + } + auto elements{*optElements}; auto elementBytes{bytes > 0 ? bytes / elements : 0}; if (elements * elementBytes != bytes) { return SizeMismatch; diff --git a/flang/lib/Evaluate/constant.cpp b/flang/lib/Evaluate/constant.cpp index 0e0d412118d3bb..a3bdefb76a414c 100644 --- a/flang/lib/Evaluate/constant.cpp +++ b/flang/lib/Evaluate/constant.cpp @@ -80,8 +80,18 @@ ConstantSubscript ConstantBounds::SubscriptsToOffset( return offset; } -std::size_t TotalElementCount(const ConstantSubscripts &shape) { - return static_cast(GetSize(shape)); +std::optional TotalElementCount(const ConstantSubscripts &shape) { + uint64_t size{1}; + for (auto dim : shape) { + CHECK(dim >= 0); + uint64_t osize{size}; + size = osize * dim; + if (size > std::numeric_limits::max() || + (dim != 0 && size / dim != osize)) { + return std::nullopt; + } + } + return static_cast(GetSize(shape)); } bool ConstantBounds::IncrementSubscripts( @@ -135,7 +145,7 @@ template ConstantBase::ConstantBase( std::vector &&x, ConstantSubscripts &&sh, Result res) : ConstantBounds(std::move(sh)), result_{res}, values_(std::move(x)) { - CHECK(size() == TotalElementCount(shape())); + CHECK(TotalElementCount(shape()) && size() == *TotalElementCount(shape())); } template @@ -149,7 +159,9 @@ bool ConstantBase::operator==(const ConstantBase &that) const { template auto ConstantBase::Reshape( const ConstantSubscripts &dims) const -> std::vector { - std::size_t n{TotalElementCount(dims)}; + std::optional optN{TotalElementCount(dims)}; + CHECK(optN); + uint64_t n{*optN}; CHECK(!empty() || n == 0); std::vector elements; auto iter{values().cbegin()}; @@ -209,7 +221,8 @@ template Constant>::Constant(ConstantSubscript len, std::vector> &&strings, ConstantSubscripts &&sh) : ConstantBounds(std::move(sh)), length_{len} { - CHECK(strings.size() == TotalElementCount(shape())); + CHECK(TotalElementCount(shape()) && + strings.size() == *TotalElementCount(shape())); values_.assign(strings.size() * length_, static_cast::value_type>(' ')); ConstantSubscript at{0}; @@ -236,7 +249,9 @@ bool Constant>::empty() const { template std::size_t Constant>::size() const { if (length_ == 0) { - return TotalElementCount(shape()); + std::optional n{TotalElementCount(shape())}; + CHECK(n); + return *n; } else { return static_cast(values_.size()) / length_; } @@ -274,7 +289,9 @@ auto Constant>::Substring( template auto Constant>::Reshape( ConstantSubscripts &&dims) const -> Constant { - std::size_t n{TotalElementCount(dims)}; + std::optional optN{TotalElementCount(dims)}; + CHECK(optN); + uint64_t n{*optN}; CHECK(!empty() || n == 0); std::vector elements; ConstantSubscript at{0}, diff --git a/flang/lib/Evaluate/fold-designator.cpp b/flang/lib/Evaluate/fold-designator.cpp index 7298b0a2fb10c5..6952436681f753 100644 --- a/flang/lib/Evaluate/fold-designator.cpp +++ b/flang/lib/Evaluate/fold-designator.cpp @@ -373,7 +373,9 @@ ConstantObjectPointer ConstantObjectPointer::From( FoldingContext &context, const Expr &expr) { auto extents{GetConstantExtents(context, expr)}; CHECK(extents); - std::size_t elements{TotalElementCount(*extents)}; + std::optional optElements{TotalElementCount(*extents)}; + CHECK(optElements); + uint64_t elements{*optElements}; CHECK(elements > 0); int rank{GetRank(*extents)}; ConstantSubscripts at(rank, 1); diff --git a/flang/lib/Evaluate/fold-implementation.h b/flang/lib/Evaluate/fold-implementation.h index 2a40018cd5a386..868b7b6990fd38 100644 --- a/flang/lib/Evaluate/fold-implementation.h +++ b/flang/lib/Evaluate/fold-implementation.h @@ -492,7 +492,13 @@ Expr FoldElementalIntrinsicHelper(FoldingContext &context, CHECK(rank == GetRank(shape)); // Compute all the scalar values of the results std::vector> results; - if (TotalElementCount(shape) > 0) { + std::optional n{TotalElementCount(shape)}; + if (!n) { + context.messages().Say( + "Too many elements in elemental intrinsic function result"_err_en_US); + return Expr{std::move(funcRef)}; + } + if (*n > 0) { ConstantBounds bounds{shape}; ConstantSubscripts resultIndex(rank, 1); ConstantSubscripts argIndex[]{std::get(*args)->lbounds()...}; @@ -879,33 +885,40 @@ template Expr Folder::RESHAPE(FunctionRef &&funcRef) { context_.messages().Say( "'shape=' argument must not have a negative extent"_err_en_US); } else { - int rank{GetRank(shape.value())}; - std::size_t resultElements{TotalElementCount(shape.value())}; - std::optional> dimOrder; - if (order) { - dimOrder = ValidateDimensionOrder(rank, *order); - } - std::vector *dimOrderPtr{dimOrder ? &dimOrder.value() : nullptr}; - if (order && !dimOrder) { - context_.messages().Say("Invalid 'order=' argument in RESHAPE"_err_en_US); - } else if (resultElements > source->size() && (!pad || pad->empty())) { + std::optional optResultElement{TotalElementCount(shape.value())}; + if (!optResultElement) { context_.messages().Say( - "Too few elements in 'source=' argument and 'pad=' " - "argument is not present or has null size"_err_en_US); + "'shape=' argument has too many elements"_err_en_US); } else { - Constant result{!source->empty() || !pad - ? source->Reshape(std::move(shape.value())) - : pad->Reshape(std::move(shape.value()))}; - ConstantSubscripts subscripts{result.lbounds()}; - auto copied{result.CopyFrom(*source, - std::min(source->size(), resultElements), subscripts, dimOrderPtr)}; - if (copied < resultElements) { - CHECK(pad); - copied += result.CopyFrom( - *pad, resultElements - copied, subscripts, dimOrderPtr); + int rank{GetRank(shape.value())}; + uint64_t resultElements{*optResultElement}; + std::optional> dimOrder; + if (order) { + dimOrder = ValidateDimensionOrder(rank, *order); + } + std::vector *dimOrderPtr{dimOrder ? &dimOrder.value() : nullptr}; + if (order && !dimOrder) { + context_.messages().Say( + "Invalid 'order=' argument in RESHAPE"_err_en_US); + } else if (resultElements > source->size() && (!pad || pad->empty())) { + context_.messages().Say( + "Too few elements in 'source=' argument and 'pad=' " + "argument is not present or has null size"_err_en_US); + } else { + Constant result{!source->empty() || !pad + ? source->Reshape(std::move(shape.value())) + : pad->Reshape(std::move(shape.value()))}; + ConstantSubscripts subscripts{result.lbounds()}; + auto copied{result.CopyFrom(*source, + std::min(source->size(), resultElements), subscripts, dimOrderPtr)}; + if (copied < resultElements) { + CHECK(pad); + copied += result.CopyFrom( + *pad, resultElements - copied, subscripts, dimOrderPtr); + } + CHECK(copied == resultElements); + return Expr{std::move(result)}; } - CHECK(copied == resultElements); - return Expr{std::move(result)}; } } // Invalid, prevent re-folding @@ -944,14 +957,19 @@ template Expr Folder::SPREAD(FunctionRef &&funcRef) { ConstantSubscripts shape{source->shape()}; shape.insert(shape.begin() + *dim - 1, *ncopies); Constant spread{source->Reshape(std::move(shape))}; - std::vector dimOrder; - for (int j{0}; j < sourceRank; ++j) { - dimOrder.push_back(j < *dim - 1 ? j : j + 1); - } - dimOrder.push_back(*dim - 1); - ConstantSubscripts at{spread.lbounds()}; // all 1 - spread.CopyFrom(*source, TotalElementCount(spread.shape()), at, &dimOrder); - return Expr{std::move(spread)}; + std::optional n{TotalElementCount(spread.shape())}; + if (!n) { + context_.messages().Say("Too many elements in SPREAD result"_err_en_US); + } else { + std::vector dimOrder; + for (int j{0}; j < sourceRank; ++j) { + dimOrder.push_back(j < *dim - 1 ? j : j + 1); + } + dimOrder.push_back(*dim - 1); + ConstantSubscripts at{spread.lbounds()}; // all 1 + spread.CopyFrom(*source, *n, at, &dimOrder); + return Expr{std::move(spread)}; + } } // Invalid, prevent re-folding return MakeInvalidIntrinsic(std::move(funcRef)); diff --git a/flang/lib/Evaluate/initial-image.cpp b/flang/lib/Evaluate/initial-image.cpp index a0fe4ec95da94d..3b0d738c422d4f 100644 --- a/flang/lib/Evaluate/initial-image.cpp +++ b/flang/lib/Evaluate/initial-image.cpp @@ -18,7 +18,11 @@ auto InitialImage::Add(ConstantSubscript offset, std::size_t bytes, if (offset < 0 || offset + bytes > data_.size()) { return OutOfRange; } else { - auto elements{TotalElementCount(x.shape())}; + auto optElements{TotalElementCount(x.shape())}; + if (!optElements) { + return TooManyElems; + } + auto elements{*optElements}; auto elementBytes{bytes > 0 ? bytes / elements : 0}; if (elements * elementBytes != bytes) { return SizeMismatch; @@ -89,7 +93,9 @@ class AsConstantHelper { } using Const = Constant; using Scalar = typename Const::Element; - std::size_t elements{TotalElementCount(extents_)}; + std::optional optElements{TotalElementCount(extents_)}; + CHECK(optElements); + uint64_t elements{*optElements}; std::vector typedValue(elements); auto elemBytes{ToInt64(type_.MeasureSizeInBytes( context_, GetRank(extents_) > 0, charLength_))}; diff --git a/flang/lib/Semantics/data-to-inits.cpp b/flang/lib/Semantics/data-to-inits.cpp index bc2d8147e91b55..86f5f399310c0f 100644 --- a/flang/lib/Semantics/data-to-inits.cpp +++ b/flang/lib/Semantics/data-to-inits.cpp @@ -462,6 +462,8 @@ bool DataInitializationCompiler::InitElement( "DATA statement value '%s' for '%s' has the wrong length"_warn_en_US, folded.AsFortran(), DescribeElement()); return true; + } else if (status == evaluate::InitialImage::TooManyElems) { + exprAnalyzer_.Say("DATA statement has too many elements"_err_en_US); } else { CHECK(exprAnalyzer_.context().AnyFatalError()); } diff --git a/flang/test/Semantics/reshape.f90 b/flang/test/Semantics/reshape.f90 index fb5e0023e2716e..ea302ceed66aad 100644 --- a/flang/test/Semantics/reshape.f90 +++ b/flang/test/Semantics/reshape.f90 @@ -49,6 +49,10 @@ program reshaper integer, parameter :: array21(I64_MAX - 2 : I64_MAX) = [1, 2, 3] integer, parameter :: array22(2) = RESHAPE(array21, [2]) + integer(8), parameter :: huge_shape(2) = [I64_MAX, I64_MAX] + !ERROR: 'shape=' argument has too many elements + integer :: array23(I64_MAX, I64_MAX) = RESHAPE([1, 2, 3], huge_shape) + !ERROR: Size of 'shape=' argument must not be greater than 15 CALL ext_sub(RESHAPE([(n, n=1,20)], & [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])) From 9a7c26a399a827a5eb6ec5a2c5895c1bb5b08c54 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 30 Oct 2023 10:27:57 -0700 Subject: [PATCH 052/144] [GISel] Restrict G_BSWAP to multiples of 16 bits. (#70245) This is consistent with the IR verifier and SelectionDAG's getNode. Update tests accordingly. I tried to keep some coverage of non-pow2 when possible. X86 didn't like a G_UNMERGE_VALUES from s48 to 3 s16 that got created when I tried s48. --- llvm/lib/CodeGen/MachineVerifier.cpp | 6 + .../AArch64/GlobalISel/legalize-bswap.mir | 51 +++------ .../AMDGPU/GlobalISel/legalize-bswap.mir | 106 ++++++------------ .../CodeGen/X86/GlobalISel/legalize-bswap.mir | 31 +---- llvm/test/MachineVerifier/test_g_bswap.mir | 19 ++++ 5 files changed, 77 insertions(+), 136 deletions(-) create mode 100644 llvm/test/MachineVerifier/test_g_bswap.mir diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index f3e676b3a41a2c..dadaf60fa09da0 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1592,6 +1592,12 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { report("G_SEXT_INREG size must be less than source bit width", MI); break; } + case TargetOpcode::G_BSWAP: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + if (DstTy.getScalarSizeInBits() % 16 != 0) + report("G_BSWAP size must be a multiple of 16 bits", MI); + break; + } case TargetOpcode::G_SHUFFLE_VECTOR: { const MachineOperand &MaskOp = MI->getOperand(3); if (!MaskOp.isShuffleMask()) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir index 6111f496602893..fba0881d4e86f1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir @@ -110,48 +110,27 @@ body: | RET_ReallyLR implicit $q0 ... --- -name: bswap_s88 +name: bswap_s80 tracksRegLiveness: true body: | bb.0: liveins: $x0 - ; CHECK-LABEL: name: bswap_s88 + ; CHECK-LABEL: name: bswap_s80 ; CHECK: liveins: $x0 - ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK: [[BSWAP:%[0-9]+]]:_(s64) = G_BSWAP [[DEF]] - ; CHECK: [[BSWAP1:%[0-9]+]]:_(s64) = G_BSWAP [[DEF]] - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[BSWAP]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[BSWAP1]], [[C1]](s64) - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; CHECK: $x0 = COPY [[OR]](s64) - ; CHECK: RET_ReallyLR implicit $x0 - %val:_(s88) = G_IMPLICIT_DEF - %bswap:_(s88) = G_BSWAP %val + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(s64) = G_BSWAP [[DEF]] + ; CHECK-NEXT: [[BSWAP1:%[0-9]+]]:_(s64) = G_BSWAP [[DEF]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[BSWAP]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[BSWAP1]], [[C1]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] + ; CHECK-NEXT: $x0 = COPY [[OR]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %val:_(s80) = G_IMPLICIT_DEF + %bswap:_(s80) = G_BSWAP %val %trunc:_(s64) = G_TRUNC %bswap $x0 = COPY %trunc(s64) RET_ReallyLR implicit $x0 ... ---- -name: bswap_s4 -tracksRegLiveness: true -body: | - bb.0: - liveins: $x0 - ; CHECK-LABEL: name: bswap_s4 - ; CHECK: liveins: $x0 - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[DEF]] - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32) - ; CHECK: %ext:_(s64) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: $x0 = COPY %ext(s64) - ; CHECK: RET_ReallyLR implicit $x0 - %val:_(s4) = G_IMPLICIT_DEF - %bswap:_(s4) = G_BSWAP %val - %ext:_(s64) = G_ZEXT %bswap - $x0 = COPY %ext(s64) - RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir index 2b855e33e96d4d..63235842de57bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir @@ -2,42 +2,6 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s ---- -name: bswap_s8 - -body: | - bb.0: - liveins: $vgpr0 - ; GFX7-LABEL: name: bswap_s8 - ; GFX7: liveins: $vgpr0 - ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) - ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX7-NEXT: $vgpr0 = COPY [[OR]](s32) - ; GFX8-LABEL: name: bswap_s8 - ; GFX8: liveins: $vgpr0 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(s16) = G_BSWAP [[TRUNC]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[BSWAP]], [[C]](s16) - ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s8) = G_TRUNC %0 - %2:_(s8) = G_BSWAP %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 -... - --- name: bswap_s16 @@ -74,40 +38,6 @@ body: | $vgpr0 = COPY %3 ... ---- -name: bswap_s24 - -body: | - bb.0: - liveins: $vgpr0 - ; GFX7-LABEL: name: bswap_s24 - ; GFX7: liveins: $vgpr0 - ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) - ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX7-NEXT: $vgpr0 = COPY [[OR]](s32) - ; GFX8-LABEL: name: bswap_s24 - ; GFX8: liveins: $vgpr0 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[LSHR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s24) = G_TRUNC %0 - %2:_(s24) = G_BSWAP %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 -... - --- name: bswap_s32 @@ -438,3 +368,39 @@ body: | %1:_(<2 x s64>) = G_BSWAP %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... + +--- +name: bswap_s48 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GFX7-LABEL: name: bswap_s48 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] + ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] + ; GFX7-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[C]](s32) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; + ; GFX8-LABEL: name: bswap_s48 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] + ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[C]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s48) = G_TRUNC %0 + %2:_(s48) = G_BSWAP %1 + %3:_(s64) = G_ANYEXT %2 + $vgpr0_vgpr1 = COPY %3 +... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-bswap.mir index bdac19b090d223..5aa78f33aa98bd 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-bswap.mir @@ -2,37 +2,8 @@ # RUN: llc -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=X86-32 # RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=X86-64 -# test bswap for s16, s17, s32, and s64 +# test bswap for s16, s32, and s64 -... ---- -name: test_bswap17 -body: | - bb.1: - ; X86-32-LABEL: name: test_bswap17 - ; X86-32: [[DEF:%[0-9]+]]:_(s17) = IMPLICIT_DEF - ; X86-32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s17) - ; X86-32-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[ANYEXT]] - ; X86-32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 15 - ; X86-32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s8) - ; X86-32-NEXT: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[LSHR]](s32) - ; X86-32-NEXT: [[COPY:%[0-9]+]]:_(s17) = COPY [[TRUNC]](s17) - ; X86-32-NEXT: RET 0, implicit [[COPY]](s17) - ; X86-64-LABEL: name: test_bswap17 - ; X86-64: [[DEF:%[0-9]+]]:_(s17) = IMPLICIT_DEF - ; X86-64-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s17) - ; X86-64-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[ANYEXT]] - ; X86-64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 15 - ; X86-64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s8) - ; X86-64-NEXT: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[LSHR]](s32) - ; X86-64-NEXT: [[COPY:%[0-9]+]]:_(s17) = COPY [[TRUNC]](s17) - ; X86-64-NEXT: RET 0, implicit [[COPY]](s17) - %0:_(s17) = IMPLICIT_DEF - %1:_(s17) = G_BSWAP %0 - %2:_(s17) = COPY %1(s17) - RET 0, implicit %2 - -... --- name: test_bswap64 body: | diff --git a/llvm/test/MachineVerifier/test_g_bswap.mir b/llvm/test/MachineVerifier/test_g_bswap.mir new file mode 100644 index 00000000000000..679114f06a00e0 --- /dev/null +++ b/llvm/test/MachineVerifier/test_g_bswap.mir @@ -0,0 +1,19 @@ +#RUN: not --crash llc -mtriple=aarch64 -o - -global-isel -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# REQUIRES: aarch64-registered-target + +--- +name: test_bswap +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: +body: | + bb.0: + + %0:_(s17) = G_CONSTANT i32 17 + + ; CHECK: Bad machine code: G_BSWAP size must be a multiple of 16 bits + %1:_(s17) = G_BSWAP %0 + +... From e46dd6fbc0aff05884b5e3736bb4286a5914a2d3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 30 Oct 2023 18:31:33 +0100 Subject: [PATCH 053/144] Revert "[InstCombine] Simplify and/or of icmp eq with op replacement (#70335)" This reverts commit 1770a2e325192f1665018e21200596da1904a330. Stage 2 llvm-tblgen crashes when generating X86GenAsmWriter.inc and other files. --- llvm/lib/Analysis/InstructionSimplify.cpp | 60 -- llvm/test/CodeGen/PowerPC/pr45448.ll | 10 +- .../div-by-0-guard-before-smul_ov.ll | 11 +- .../div-by-0-guard-before-umul_ov.ll | 11 +- llvm/test/Transforms/InstCombine/ispow2.ll | 24 +- .../InstSimplify/and-or-icmp-ctpop.ll | 10 +- .../InstSimplify/and-or-icmp-min-max.ll | 617 +++++++++++++----- .../InstSimplify/and-or-implied-cond.ll | 10 +- .../div-by-0-guard-before-smul_ov-not.ll | 13 +- .../div-by-0-guard-before-smul_ov.ll | 11 +- .../div-by-0-guard-before-umul_ov-not.ll | 13 +- .../div-by-0-guard-before-umul_ov.ll | 11 +- ...f-negative-is-non-zero-and-no-underflow.ll | 20 +- llvm/test/Transforms/PGOProfile/chr.ll | 7 +- 14 files changed, 587 insertions(+), 241 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index a68ca950e5232f..f0e60c9a2dac6f 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2025,52 +2025,6 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, Value *Op0, return nullptr; } -static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const SimplifyQuery &Q, - bool AllowRefinement, - SmallVectorImpl *DropFlags, - unsigned MaxRecurse); - -static Value *simplifyAndOrWithICmpEq(unsigned Opcode, Value *Op0, Value *Op1, - const SimplifyQuery &Q, - unsigned MaxRecurse) { - assert((Opcode == Instruction::And || Opcode == Instruction::Or) && - "Must be and/or"); - ICmpInst::Predicate Pred; - Value *A, *B; - if (!match(Op0, m_ICmp(Pred, m_Value(A), m_Value(B))) || - !ICmpInst::isEquality(Pred) || !MaxRecurse--) - return nullptr; - - auto Simplify = [&](Value *Res) -> Value * { - // and (icmp eq a, b), x implies (a==b) inside x. - // or (icmp ne a, b), x implies (a==b) inside x. - // If x simplifies to true/false, we can simplify the and/or. - if (Pred == - (Opcode == Instruction::And ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) - return simplifyBinOp(Opcode, Op0, Res, Q, MaxRecurse); - // If we have and (icmp ne a, b), x and for a==b we can simplify x to false, - // then we can drop the icmp, as x will already be false in the case where - // the icmp is false. Similar for or and true. - if (Res == ConstantExpr::getBinOpAbsorber(Opcode, Res->getType())) - return Op1; - return nullptr; - }; - - // Increment MaxRecurse again, because simplifyWithOpReplaced() does its own - // decrement. - if (Value *Res = - simplifyWithOpReplaced(Op1, A, B, Q, /* AllowRefinement */ true, - /* DropFlags */ nullptr, MaxRecurse + 1)) - return Simplify(Res); - if (Value *Res = - simplifyWithOpReplaced(Op1, B, A, Q, /* AllowRefinement */ true, - /* DropFlags */ nullptr, MaxRecurse + 1)) - return Simplify(Res); - - return nullptr; -} - /// Given a bitwise logic op, check if the operands are add/sub with a common /// source value and inverted constant (identity: C - X -> ~(X + ~C)). static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1, @@ -2205,13 +2159,6 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT)) return Constant::getNullValue(Op0->getType()); - if (Value *V = - simplifyAndOrWithICmpEq(Instruction::And, Op0, Op1, Q, MaxRecurse)) - return V; - if (Value *V = - simplifyAndOrWithICmpEq(Instruction::And, Op1, Op0, Q, MaxRecurse)) - return V; - if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, true)) return V; @@ -2488,13 +2435,6 @@ static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op0, m_LShr(m_Specific(X), m_Specific(Y)))) return Op1; - if (Value *V = - simplifyAndOrWithICmpEq(Instruction::Or, Op0, Op1, Q, MaxRecurse)) - return V; - if (Value *V = - simplifyAndOrWithICmpEq(Instruction::Or, Op1, Op0, Q, MaxRecurse)) - return V; - if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false)) return V; diff --git a/llvm/test/CodeGen/PowerPC/pr45448.ll b/llvm/test/CodeGen/PowerPC/pr45448.ll index 0f2dcb3ccc8a0b..6b3d578f6b3382 100644 --- a/llvm/test/CodeGen/PowerPC/pr45448.ll +++ b/llvm/test/CodeGen/PowerPC/pr45448.ll @@ -20,16 +20,20 @@ define hidden void @julia_tryparse_internal_45896() #0 { ; CHECK-NEXT: .LBB0_6: # %fail194 ; CHECK-NEXT: .LBB0_7: # %L670 ; CHECK-NEXT: li r5, -3 +; CHECK-NEXT: cmpdi r3, 0 ; CHECK-NEXT: sradi r4, r3, 63 ; CHECK-NEXT: rldic r5, r5, 4, 32 +; CHECK-NEXT: crnot 4*cr5+lt, eq ; CHECK-NEXT: mulhdu r3, r3, r5 ; CHECK-NEXT: maddld r6, r4, r5, r3 ; CHECK-NEXT: cmpld cr1, r6, r3 ; CHECK-NEXT: mulhdu. r3, r4, r5 +; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_10 +; CHECK-NEXT: # %bb.8: # %L670 ; CHECK-NEXT: crorc 4*cr5+lt, 4*cr1+lt, eq -; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_9 -; CHECK-NEXT: # %bb.8: # %L917 -; CHECK-NEXT: .LBB0_9: # %L994 +; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_10 +; CHECK-NEXT: # %bb.9: # %L917 +; CHECK-NEXT: .LBB0_10: # %L994 top: %0 = load i64, ptr undef, align 8 %1 = icmp ne i64 %0, 0 diff --git a/llvm/test/Transforms/InstCombine/div-by-0-guard-before-smul_ov.ll b/llvm/test/Transforms/InstCombine/div-by-0-guard-before-smul_ov.ll index 08eefbebb73634..23bfc75b945ba1 100644 --- a/llvm/test/Transforms/InstCombine/div-by-0-guard-before-smul_ov.ll +++ b/llvm/test/Transforms/InstCombine/div-by-0-guard-before-smul_ov.ll @@ -47,7 +47,11 @@ define i1 @n2_wrong_size(i4 %size0, i4 %size1, i4 %nmemb) { define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @n3_wrong_pred( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0 +; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[SMUL_OV]], [[CMP]] +; CHECK-NEXT: ret i1 [[AND]] ; %cmp = icmp eq i4 %size, 0 ; not 'ne' %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb) @@ -59,7 +63,10 @@ define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) { define i1 @n4_not_and(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @n4_not_and( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1 +; CHECK-NEXT: [[AND:%.*]] = or i1 [[SMUL_OV]], [[CMP]] +; CHECK-NEXT: ret i1 [[AND]] ; %cmp = icmp ne i4 %size, 0 %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb) diff --git a/llvm/test/Transforms/InstCombine/div-by-0-guard-before-umul_ov.ll b/llvm/test/Transforms/InstCombine/div-by-0-guard-before-umul_ov.ll index 047f8855fe5cb8..dbc3b5e7a25be3 100644 --- a/llvm/test/Transforms/InstCombine/div-by-0-guard-before-umul_ov.ll +++ b/llvm/test/Transforms/InstCombine/div-by-0-guard-before-umul_ov.ll @@ -47,7 +47,11 @@ define i1 @n2_wrong_size(i4 %size0, i4 %size1, i4 %nmemb) { define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @n3_wrong_pred( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0 +; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[UMUL_OV]], [[CMP]] +; CHECK-NEXT: ret i1 [[AND]] ; %cmp = icmp eq i4 %size, 0 ; not 'ne' %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb) @@ -59,7 +63,10 @@ define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) { define i1 @n4_not_and(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @n4_not_and( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1 +; CHECK-NEXT: [[AND:%.*]] = or i1 [[UMUL_OV]], [[CMP]] +; CHECK-NEXT: ret i1 [[AND]] ; %cmp = icmp ne i4 %size, 0 %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb) diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll index cc50c5cd1e6680..740f79cd32b39e 100644 --- a/llvm/test/Transforms/InstCombine/ispow2.ll +++ b/llvm/test/Transforms/InstCombine/ispow2.ll @@ -392,7 +392,9 @@ define i1 @is_pow2_ctpop_wrong_pred1(i32 %x) { ; CHECK-LABEL: @is_pow2_ctpop_wrong_pred1( ; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 2 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) %cmp = icmp ugt i32 %t0, 2 @@ -944,7 +946,9 @@ define i1 @is_pow2or0_ctpop_wrong_pred1(i32 %x) { ; CHECK-LABEL: @is_pow2or0_ctpop_wrong_pred1( ; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[T0]], 1 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[ISZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) %cmp = icmp ne i32 %t0, 1 @@ -955,7 +959,11 @@ define i1 @is_pow2or0_ctpop_wrong_pred1(i32 %x) { define i1 @is_pow2or0_ctpop_wrong_pred2(i32 %x) { ; CHECK-LABEL: @is_pow2or0_ctpop_wrong_pred2( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[T0]], 1 +; CHECK-NEXT: [[ISZERO:%.*]] = icmp ne i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[ISZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) %cmp = icmp ne i32 %t0, 1 @@ -1141,7 +1149,9 @@ define i1 @isnot_pow2nor0_ctpop_wrong_pred1(i32 %x) { ; CHECK-LABEL: @isnot_pow2nor0_ctpop_wrong_pred1( ; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[T0]], 1 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) %cmp = icmp eq i32 %t0, 1 @@ -1152,7 +1162,11 @@ define i1 @isnot_pow2nor0_ctpop_wrong_pred1(i32 %x) { define i1 @isnot_pow2nor0_ctpop_wrong_pred2(i32 %x) { ; CHECK-LABEL: @isnot_pow2nor0_ctpop_wrong_pred2( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[T0]], 1 +; CHECK-NEXT: [[NOTZERO:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) %cmp = icmp eq i32 %t0, 1 diff --git a/llvm/test/Transforms/InstSimplify/and-or-icmp-ctpop.ll b/llvm/test/Transforms/InstSimplify/and-or-icmp-ctpop.ll index 6fe8d29bd10bf5..6de97c3a7a76de 100644 --- a/llvm/test/Transforms/InstSimplify/and-or-icmp-ctpop.ll +++ b/llvm/test/Transforms/InstSimplify/and-or-icmp-ctpop.ll @@ -40,7 +40,11 @@ define <2 x i1> @eq_or_non_0_commute(<2 x i32> %x) { define i1 @eq_or_non_0_wrong_pred1(i32 %x) { ; CHECK-LABEL: @eq_or_non_0_wrong_pred1( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[T0]], 10 +; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[NOTZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) %cmp = icmp ne i32 %t0, 10 @@ -86,7 +90,9 @@ define i1 @ne_and_is_0_wrong_pred1(i32 %x) { ; CHECK-LABEL: @ne_and_is_0_wrong_pred1( ; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[T0]], 10 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[ISZERO]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %t0 = tail call i32 @llvm.ctpop.i32(i32 %x) %cmp = icmp ne i32 %t0, 10 diff --git a/llvm/test/Transforms/InstSimplify/and-or-icmp-min-max.ll b/llvm/test/Transforms/InstSimplify/and-or-icmp-min-max.ll index 4e3832f31e5a4c..7ea1797c99898f 100644 --- a/llvm/test/Transforms/InstSimplify/and-or-icmp-min-max.ll +++ b/llvm/test/Transforms/InstSimplify/and-or-icmp-min-max.ll @@ -16,7 +16,10 @@ define i1 @slt_and_max(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_and_max( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp slt i8 %x, %y %cmpeq = icmp eq i8 %x, 127 @@ -26,7 +29,10 @@ define i1 @slt_and_max(i8 %x, i8 %y) { define <2 x i1> @slt_and_max_commute(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @slt_and_max_commute( -; CHECK-NEXT: ret <2 x i1> zeroinitializer +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq <2 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret <2 x i1> [[R]] ; %cmp = icmp slt <2 x i8> %x, %y %cmpeq = icmp eq <2 x i8> %x, @@ -36,7 +42,10 @@ define <2 x i1> @slt_and_max_commute(<2 x i8> %x, <2 x i8> %y) { define i1 @slt_swap_and_max(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_and_max( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sgt i8 %y, %x %cmpeq = icmp eq i8 %x, 127 @@ -46,7 +55,10 @@ define i1 @slt_swap_and_max(i8 %x, i8 %y) { define i1 @slt_swap_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_and_max_commute( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sgt i8 %y, %x %cmpeq = icmp eq i8 %x, 127 @@ -56,7 +68,10 @@ define i1 @slt_swap_and_max_commute(i8 %x, i8 %y) { define i1 @ult_and_max(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_and_max( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp ult i8 %x, %y %cmpeq = icmp eq i8 %x, 255 @@ -66,7 +81,10 @@ define i1 @ult_and_max(i8 %x, i8 %y) { define i1 @ult_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_and_max_commute( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp ult i8 %x, %y %cmpeq = icmp eq i8 %x, 255 @@ -76,7 +94,10 @@ define i1 @ult_and_max_commute(i8 %x, i8 %y) { define i1 @ult_swap_and_max(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_and_max( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp ugt i8 %y, %x %cmpeq = icmp eq i8 %x, 255 @@ -86,7 +107,10 @@ define i1 @ult_swap_and_max(i8 %x, i8 %y) { define i1 @ult_swap_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_and_max_commute( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp ugt i8 %y, %x %cmpeq = icmp eq i8 %x, 255 @@ -102,7 +126,10 @@ define i1 @ult_swap_and_max_commute(i8 %x, i8 %y) { define i1 @sgt_and_min(i9 %x, i9 %y) { ; CHECK-LABEL: @sgt_and_min( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i9 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i9 [[X]], -256 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sgt i9 %x, %y %cmpeq = icmp eq i9 %x, 256 @@ -112,7 +139,10 @@ define i1 @sgt_and_min(i9 %x, i9 %y) { define i1 @sgt_and_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_and_min_commute( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sgt i8 %x, %y %cmpeq = icmp eq i8 %x, 128 @@ -122,7 +152,10 @@ define i1 @sgt_and_min_commute(i8 %x, i8 %y) { define i1 @sgt_swap_and_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_and_min( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp slt i8 %y, %x %cmpeq = icmp eq i8 %x, 128 @@ -132,7 +165,10 @@ define i1 @sgt_swap_and_min(i8 %x, i8 %y) { define i1 @sgt_swap_and_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_and_min_commute( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp slt i8 %y, %x %cmpeq = icmp eq i8 %x, 128 @@ -188,7 +224,10 @@ define i1 @ugt_swap_and_min_commute(i8 %x, i8 %y) { define i1 @sge_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_or_not_max( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sge i8 %x, %y %cmpeq = icmp ne i8 %x, 127 @@ -198,7 +237,10 @@ define i1 @sge_or_not_max(i8 %x, i8 %y) { define i1 @sge_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_or_not_max_commute( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sge i8 %x, %y %cmpeq = icmp ne i8 %x, 127 @@ -208,7 +250,10 @@ define i1 @sge_or_not_max_commute(i8 %x, i8 %y) { define i1 @sge_swap_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_or_not_max( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sle i8 %y, %x %cmpeq = icmp ne i8 %x, 127 @@ -218,7 +263,10 @@ define i1 @sge_swap_or_not_max(i8 %x, i8 %y) { define i1 @sge_swap_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_or_not_max_commute( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sle i8 %y, %x %cmpeq = icmp ne i8 %x, 127 @@ -228,7 +276,10 @@ define i1 @sge_swap_or_not_max_commute(i8 %x, i8 %y) { define i1 @uge_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_or_not_max( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp uge i8 %x, %y %cmpeq = icmp ne i8 %x, 255 @@ -238,7 +289,10 @@ define i1 @uge_or_not_max(i8 %x, i8 %y) { define i1 @uge_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_or_not_max_commute( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp uge i8 %x, %y %cmpeq = icmp ne i8 %x, 255 @@ -248,7 +302,10 @@ define i1 @uge_or_not_max_commute(i8 %x, i8 %y) { define i1 @uge_swap_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_or_not_max( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp ule i8 %y, %x %cmpeq = icmp ne i8 %x, 255 @@ -258,7 +315,10 @@ define i1 @uge_swap_or_not_max(i8 %x, i8 %y) { define i1 @uge_swap_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_or_not_max_commute( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp ule i8 %y, %x %cmpeq = icmp ne i8 %x, 255 @@ -274,7 +334,10 @@ define i1 @uge_swap_or_not_max_commute(i8 %x, i8 %y) { define i1 @sle_or_not_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_or_not_min( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sle i8 %x, %y %cmpeq = icmp ne i8 %x, 128 @@ -284,7 +347,10 @@ define i1 @sle_or_not_min(i8 %x, i8 %y) { define i1 @sle_or_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_or_not_min_commute( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sle i8 %x, %y %cmpeq = icmp ne i8 %x, 128 @@ -294,7 +360,10 @@ define i1 @sle_or_not_min_commute(i8 %x, i8 %y) { define i1 @sle_swap_or_not_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_swap_or_not_min( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sge i8 %y, %x %cmpeq = icmp ne i8 %x, 128 @@ -304,7 +373,10 @@ define i1 @sle_swap_or_not_min(i8 %x, i8 %y) { define i1 @sle_swap_or_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_swap_or_not_min_commute( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sge i8 %y, %x %cmpeq = icmp ne i8 %x, 128 @@ -360,8 +432,10 @@ define i1 @ule_swap_or_not_min_commute(i8 %x, i8 %y) { define i1 @sge_and_max(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_and_max( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 127 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sge i8 %x, %y %cmpeq = icmp eq i8 %x, 127 @@ -371,8 +445,10 @@ define i1 @sge_and_max(i8 %x, i8 %y) { define i1 @sge_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_and_max_commute( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 127 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sge i8 %x, %y %cmpeq = icmp eq i8 %x, 127 @@ -382,8 +458,10 @@ define i1 @sge_and_max_commute(i8 %x, i8 %y) { define i1 @sge_swap_and_max(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_and_max( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 127 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sle i8 %y, %x %cmpeq = icmp eq i8 %x, 127 @@ -393,8 +471,10 @@ define i1 @sge_swap_and_max(i8 %x, i8 %y) { define i1 @sge_swap_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_and_max_commute( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 127 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sle i8 %y, %x %cmpeq = icmp eq i8 %x, 127 @@ -404,8 +484,10 @@ define i1 @sge_swap_and_max_commute(i8 %x, i8 %y) { define i1 @uge_and_max(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_and_max( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -1 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp uge i8 %x, %y %cmpeq = icmp eq i8 %x, 255 @@ -415,8 +497,10 @@ define i1 @uge_and_max(i8 %x, i8 %y) { define i1 @uge_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_and_max_commute( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -1 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp uge i8 %x, %y %cmpeq = icmp eq i8 %x, 255 @@ -426,8 +510,10 @@ define i1 @uge_and_max_commute(i8 %x, i8 %y) { define i1 @uge_swap_and_max(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_and_max( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -1 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp ule i8 %y, %x %cmpeq = icmp eq i8 %x, 255 @@ -437,8 +523,10 @@ define i1 @uge_swap_and_max(i8 %x, i8 %y) { define i1 @uge_swap_and_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_and_max_commute( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -1 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp ule i8 %y, %x %cmpeq = icmp eq i8 %x, 255 @@ -454,8 +542,10 @@ define i1 @uge_swap_and_max_commute(i8 %x, i8 %y) { define i1 @sle_and_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_and_min( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sle i8 %x, %y %cmpeq = icmp eq i8 %x, 128 @@ -465,8 +555,10 @@ define i1 @sle_and_min(i8 %x, i8 %y) { define i1 @sle_and_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_and_min_commute( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sle i8 %x, %y %cmpeq = icmp eq i8 %x, 128 @@ -476,8 +568,10 @@ define i1 @sle_and_min_commute(i8 %x, i8 %y) { define i1 @sle_swap_and_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_swap_and_min( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sge i8 %y, %x %cmpeq = icmp eq i8 %x, 128 @@ -487,8 +581,10 @@ define i1 @sle_swap_and_min(i8 %x, i8 %y) { define i1 @sle_swap_and_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_swap_and_min_commute( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sge i8 %y, %x %cmpeq = icmp eq i8 %x, 128 @@ -924,8 +1020,10 @@ define i1 @ugt_swap_and_not_min_commute(i8 %x, i8 %y) { define i1 @slt_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_or_not_max( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp slt i8 %x, %y %cmpeq = icmp ne i8 %x, 127 @@ -935,8 +1033,10 @@ define i1 @slt_or_not_max(i8 %x, i8 %y) { define i1 @slt_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_or_not_max_commute( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp slt i8 %x, %y %cmpeq = icmp ne i8 %x, 127 @@ -946,8 +1046,10 @@ define i1 @slt_or_not_max_commute(i8 %x, i8 %y) { define i1 @slt_swap_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_or_not_max( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sgt i8 %y, %x %cmpeq = icmp ne i8 %x, 127 @@ -957,8 +1059,10 @@ define i1 @slt_swap_or_not_max(i8 %x, i8 %y) { define i1 @slt_swap_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_or_not_max_commute( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sgt i8 %y, %x %cmpeq = icmp ne i8 %x, 127 @@ -968,8 +1072,10 @@ define i1 @slt_swap_or_not_max_commute(i8 %x, i8 %y) { define i1 @ult_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_or_not_max( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp ult i8 %x, %y %cmpeq = icmp ne i8 %x, 255 @@ -979,8 +1085,10 @@ define i1 @ult_or_not_max(i8 %x, i8 %y) { define i1 @ult_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_or_not_max_commute( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp ult i8 %x, %y %cmpeq = icmp ne i8 %x, 255 @@ -990,8 +1098,10 @@ define i1 @ult_or_not_max_commute(i8 %x, i8 %y) { define i1 @ult_swap_or_not_max(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_or_not_max( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp ugt i8 %y, %x %cmpeq = icmp ne i8 %x, 255 @@ -1001,8 +1111,10 @@ define i1 @ult_swap_or_not_max(i8 %x, i8 %y) { define i1 @ult_swap_or_not_max_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_or_not_max_commute( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp ugt i8 %y, %x %cmpeq = icmp ne i8 %x, 255 @@ -1018,8 +1130,10 @@ define i1 @ult_swap_or_not_max_commute(i8 %x, i8 %y) { define i1 @sgt_or_not_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_or_not_min( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sgt i8 %x, %y %cmpeq = icmp ne i8 %x, 128 @@ -1029,8 +1143,10 @@ define i1 @sgt_or_not_min(i8 %x, i8 %y) { define i1 @sgt_or_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_or_not_min_commute( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp sgt i8 %x, %y %cmpeq = icmp ne i8 %x, 128 @@ -1040,8 +1156,10 @@ define i1 @sgt_or_not_min_commute(i8 %x, i8 %y) { define i1 @sgt_swap_or_not_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_or_not_min( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp slt i8 %y, %x %cmpeq = icmp ne i8 %x, 128 @@ -1051,8 +1169,10 @@ define i1 @sgt_swap_or_not_min(i8 %x, i8 %y) { define i1 @sgt_swap_or_not_min_commute(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_or_not_min_commute( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp slt i8 %y, %x %cmpeq = icmp ne i8 %x, 128 @@ -1112,7 +1232,11 @@ define i1 @ugt_swap_or_not_min_commute(i823 %x, i823 %y) { define i1 @slt_and_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_and_max_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp slt i8 %notx, %y @@ -1123,7 +1247,11 @@ define i1 @slt_and_max_not_op(i8 %x, i8 %y) { define <2 x i1> @slt_and_max_commute_not_op(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @slt_and_max_commute_not_op( -; CHECK-NEXT: ret <2 x i1> zeroinitializer +; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq <2 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret <2 x i1> [[R]] ; %notx = xor <2 x i8> %x, %cmp = icmp slt <2 x i8> %notx, %y @@ -1134,7 +1262,11 @@ define <2 x i1> @slt_and_max_commute_not_op(<2 x i8> %x, <2 x i8> %y) { define i1 @slt_swap_and_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_and_max_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sgt i8 %y, %notx @@ -1145,7 +1277,11 @@ define i1 @slt_swap_and_max_not_op(i8 %x, i8 %y) { define i1 @slt_swap_and_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_and_max_commute_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sgt i8 %y, %notx @@ -1156,7 +1292,11 @@ define i1 @slt_swap_and_max_commute_not_op(i8 %x, i8 %y) { define i1 @ult_and_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_and_max_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ult i8 %notx, %y @@ -1167,7 +1307,11 @@ define i1 @ult_and_max_not_op(i8 %x, i8 %y) { define i1 @ult_and_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_and_max_commute_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ult i8 %notx, %y @@ -1178,7 +1322,11 @@ define i1 @ult_and_max_commute_not_op(i8 %x, i8 %y) { define i1 @ult_swap_and_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_and_max_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ugt i8 %y, %notx @@ -1189,7 +1337,11 @@ define i1 @ult_swap_and_max_not_op(i8 %x, i8 %y) { define i1 @ult_swap_and_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_and_max_commute_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ugt i8 %y, %notx @@ -1206,7 +1358,11 @@ define i1 @ult_swap_and_max_commute_not_op(i8 %x, i8 %y) { define i1 @sgt_and_min_not_op(i9 %x, i9 %y) { ; CHECK-LABEL: @sgt_and_min_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i9 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i9 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i9 [[X]], 255 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i9 %x, -1 %cmp = icmp sgt i9 %notx, %y @@ -1217,7 +1373,11 @@ define i1 @sgt_and_min_not_op(i9 %x, i9 %y) { define i1 @sgt_and_min_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_and_min_commute_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sgt i8 %notx, %y @@ -1228,7 +1388,11 @@ define i1 @sgt_and_min_commute_not_op(i8 %x, i8 %y) { define i1 @sgt_swap_and_min_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_and_min_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp slt i8 %y, %notx @@ -1239,7 +1403,11 @@ define i1 @sgt_swap_and_min_not_op(i8 %x, i8 %y) { define i1 @sgt_swap_and_min_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_and_min_commute_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp slt i8 %y, %notx @@ -1250,7 +1418,11 @@ define i1 @sgt_swap_and_min_commute_not_op(i8 %x, i8 %y) { define i1 @ugt_and_min_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_and_min_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ugt i8 %notx, %y @@ -1261,7 +1433,11 @@ define i1 @ugt_and_min_not_op(i8 %x, i8 %y) { define i1 @ugt_and_min_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_and_min_commute_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ugt i8 %notx, %y @@ -1272,7 +1448,11 @@ define i1 @ugt_and_min_commute_not_op(i8 %x, i8 %y) { define i1 @ugt_swap_and_min_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_swap_and_min_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ult i8 %y, %notx @@ -1283,7 +1463,11 @@ define i1 @ugt_swap_and_min_not_op(i8 %x, i8 %y) { define i1 @ugt_swap_and_min_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_swap_and_min_commute_not_op( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ult i8 %y, %notx @@ -1300,7 +1484,11 @@ define i1 @ugt_swap_and_min_commute_not_op(i8 %x, i8 %y) { define i1 @sge_or_not_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_or_not_max_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sge i8 %notx, %y @@ -1311,7 +1499,11 @@ define i1 @sge_or_not_max_not_op(i8 %x, i8 %y) { define i1 @sge_or_not_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_or_not_max_commute_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sge i8 %notx, %y @@ -1322,7 +1514,11 @@ define i1 @sge_or_not_max_commute_not_op(i8 %x, i8 %y) { define i1 @sge_swap_or_not_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_or_not_max_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sle i8 %y, %notx @@ -1333,7 +1529,11 @@ define i1 @sge_swap_or_not_max_not_op(i8 %x, i8 %y) { define i1 @sge_swap_or_not_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_or_not_max_commute_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sle i8 %y, %notx @@ -1344,7 +1544,11 @@ define i1 @sge_swap_or_not_max_commute_not_op(i8 %x, i8 %y) { define i1 @uge_or_not_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_or_not_max_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp uge i8 %notx, %y @@ -1355,7 +1559,11 @@ define i1 @uge_or_not_max_not_op(i8 %x, i8 %y) { define i1 @uge_or_not_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_or_not_max_commute_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp uge i8 %notx, %y @@ -1366,7 +1574,11 @@ define i1 @uge_or_not_max_commute_not_op(i8 %x, i8 %y) { define i1 @uge_swap_or_not_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_or_not_max_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ule i8 %y, %notx @@ -1377,7 +1589,11 @@ define i1 @uge_swap_or_not_max_not_op(i8 %x, i8 %y) { define i1 @uge_swap_or_not_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_or_not_max_commute_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ule i8 %y, %notx @@ -1394,7 +1610,11 @@ define i1 @uge_swap_or_not_max_commute_not_op(i8 %x, i8 %y) { define i1 @sle_or_not_min_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_or_not_min_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sle i8 %notx, %y @@ -1405,7 +1625,11 @@ define i1 @sle_or_not_min_not_op(i8 %x, i8 %y) { define i1 @sle_or_not_min_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_or_not_min_commute_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sle i8 %notx, %y @@ -1416,7 +1640,11 @@ define i1 @sle_or_not_min_commute_not_op(i8 %x, i8 %y) { define i1 @sle_swap_or_not_min_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_swap_or_not_min_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sge i8 %y, %notx @@ -1427,7 +1655,11 @@ define i1 @sle_swap_or_not_min_not_op(i8 %x, i8 %y) { define i1 @sle_swap_or_not_min_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sle_swap_or_not_min_commute_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sge i8 %y, %notx @@ -1438,7 +1670,11 @@ define i1 @sle_swap_or_not_min_commute_not_op(i8 %x, i8 %y) { define i1 @ule_or_not_min_not_op(i427 %x, i427 %y) { ; CHECK-LABEL: @ule_or_not_min_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i427 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i427 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i427 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i427 %x, -1 %cmp = icmp ule i427 %notx, %y @@ -1449,7 +1685,11 @@ define i1 @ule_or_not_min_not_op(i427 %x, i427 %y) { define i1 @ule_or_not_min_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_or_not_min_commute_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ule i8 %notx, %y @@ -1460,7 +1700,11 @@ define i1 @ule_or_not_min_commute_not_op(i8 %x, i8 %y) { define i1 @ule_swap_or_not_min_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_swap_or_not_min_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp uge i8 %y, %notx @@ -1471,7 +1715,11 @@ define i1 @ule_swap_or_not_min_not_op(i8 %x, i8 %y) { define i1 @ule_swap_or_not_min_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ule_swap_or_not_min_commute_not_op( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp uge i8 %y, %notx @@ -1488,8 +1736,11 @@ define i1 @ule_swap_or_not_min_commute_not_op(i8 %x, i8 %y) { define i1 @sge_and_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_and_max_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sge i8 %notx, %y @@ -1500,8 +1751,11 @@ define i1 @sge_and_max_not_op(i8 %x, i8 %y) { define i1 @sge_and_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_and_max_commute_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sge i8 %notx, %y @@ -1512,8 +1766,11 @@ define i1 @sge_and_max_commute_not_op(i8 %x, i8 %y) { define i1 @sge_swap_and_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_and_max_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sle i8 %y, %notx @@ -1524,8 +1781,11 @@ define i1 @sge_swap_and_max_not_op(i8 %x, i8 %y) { define i1 @sge_swap_and_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sge_swap_and_max_commute_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sle i8 %y, %notx @@ -1536,8 +1796,11 @@ define i1 @sge_swap_and_max_commute_not_op(i8 %x, i8 %y) { define i1 @uge_and_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_and_max_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp uge i8 %notx, %y @@ -1548,8 +1811,11 @@ define i1 @uge_and_max_not_op(i8 %x, i8 %y) { define i1 @uge_and_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_and_max_commute_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp uge i8 %notx, %y @@ -1560,8 +1826,11 @@ define i1 @uge_and_max_commute_not_op(i8 %x, i8 %y) { define i1 @uge_swap_and_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_and_max_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ule i8 %y, %notx @@ -1572,8 +1841,11 @@ define i1 @uge_swap_and_max_not_op(i8 %x, i8 %y) { define i1 @uge_swap_and_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @uge_swap_and_max_commute_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp eq i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ule i8 %y, %notx @@ -2156,8 +2428,11 @@ define i1 @ugt_swap_and_not_min_commute_not_op(i8 %x, i8 %y) { define i1 @slt_or_not_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_or_not_max_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp slt i8 %notx, %y @@ -2168,8 +2443,11 @@ define i1 @slt_or_not_max_not_op(i8 %x, i8 %y) { define i1 @slt_or_not_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_or_not_max_commute_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp slt i8 %notx, %y @@ -2180,8 +2458,11 @@ define i1 @slt_or_not_max_commute_not_op(i8 %x, i8 %y) { define i1 @slt_swap_or_not_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_or_not_max_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sgt i8 %y, %notx @@ -2192,8 +2473,11 @@ define i1 @slt_swap_or_not_max_not_op(i8 %x, i8 %y) { define i1 @slt_swap_or_not_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @slt_swap_or_not_max_commute_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -128 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -128 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sgt i8 %y, %notx @@ -2204,8 +2488,11 @@ define i1 @slt_swap_or_not_max_commute_not_op(i8 %x, i8 %y) { define i1 @ult_or_not_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_or_not_max_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ult i8 %notx, %y @@ -2216,8 +2503,11 @@ define i1 @ult_or_not_max_not_op(i8 %x, i8 %y) { define i1 @ult_or_not_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_or_not_max_commute_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ult i8 %notx, %y @@ -2228,8 +2518,11 @@ define i1 @ult_or_not_max_commute_not_op(i8 %x, i8 %y) { define i1 @ult_swap_or_not_max_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_or_not_max_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ugt i8 %y, %notx @@ -2240,8 +2533,11 @@ define i1 @ult_swap_or_not_max_not_op(i8 %x, i8 %y) { define i1 @ult_swap_or_not_max_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ult_swap_or_not_max_commute_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ugt i8 %y, %notx @@ -2258,8 +2554,11 @@ define i1 @ult_swap_or_not_max_commute_not_op(i8 %x, i8 %y) { define i1 @sgt_or_not_min_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_or_not_min_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sgt i8 %notx, %y @@ -2270,8 +2569,11 @@ define i1 @sgt_or_not_min_not_op(i8 %x, i8 %y) { define i1 @sgt_or_not_min_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_or_not_min_commute_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp sgt i8 %notx, %y @@ -2282,8 +2584,11 @@ define i1 @sgt_or_not_min_commute_not_op(i8 %x, i8 %y) { define i1 @sgt_swap_or_not_min_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_or_not_min_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp slt i8 %y, %notx @@ -2294,8 +2599,11 @@ define i1 @sgt_swap_or_not_min_not_op(i8 %x, i8 %y) { define i1 @sgt_swap_or_not_min_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @sgt_swap_or_not_min_commute_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], 127 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], 127 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp slt i8 %y, %notx @@ -2306,8 +2614,11 @@ define i1 @sgt_swap_or_not_min_commute_not_op(i8 %x, i8 %y) { define i1 @ugt_or_not_min_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_or_not_min_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ugt i8 %notx, %y @@ -2318,8 +2629,11 @@ define i1 @ugt_or_not_min_not_op(i8 %x, i8 %y) { define i1 @ugt_or_not_min_commute_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_or_not_min_commute_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMPEQ]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ugt i8 %notx, %y @@ -2330,8 +2644,11 @@ define i1 @ugt_or_not_min_commute_not_op(i8 %x, i8 %y) { define i1 @ugt_swap_or_not_min_not_op(i8 %x, i8 %y) { ; CHECK-LABEL: @ugt_swap_or_not_min_not_op( -; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X:%.*]], -1 -; CHECK-NEXT: ret i1 [[CMPEQ]] +; CHECK-NEXT: [[NOTX:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[Y:%.*]], [[NOTX]] +; CHECK-NEXT: [[CMPEQ:%.*]] = icmp ne i8 [[X]], -1 +; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[CMPEQ]] +; CHECK-NEXT: ret i1 [[R]] ; %notx = xor i8 %x, -1 %cmp = icmp ult i8 %y, %notx diff --git a/llvm/test/Transforms/InstSimplify/and-or-implied-cond.ll b/llvm/test/Transforms/InstSimplify/and-or-implied-cond.ll index 7af3138dcaf271..db38077e0c5130 100644 --- a/llvm/test/Transforms/InstSimplify/and-or-implied-cond.ll +++ b/llvm/test/Transforms/InstSimplify/and-or-implied-cond.ll @@ -236,7 +236,9 @@ define i1 @pr69050(i32 %arg, i32 %arg1) { ; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[ARG:%.*]], -1 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[XOR]], [[ARG1:%.*]] ; CHECK-NEXT: [[ICMP:%.*]] = icmp ne i32 [[AND]], 0 -; CHECK-NEXT: ret i1 [[ICMP]] +; CHECK-NEXT: [[ICMP2:%.*]] = icmp ne i32 [[ARG]], -1 +; CHECK-NEXT: [[AND3:%.*]] = and i1 [[ICMP2]], [[ICMP]] +; CHECK-NEXT: ret i1 [[AND3]] ; %xor = xor i32 %arg, -1 %and = and i32 %xor, %arg1 @@ -249,7 +251,11 @@ define i1 @pr69050(i32 %arg, i32 %arg1) { define i1 @pr69091(i32 %arg, i32 %arg1) { ; CHECK-LABEL: @pr69091( ; CHECK-NEXT: [[ICMP:%.*]] = icmp ne i32 [[ARG:%.*]], -1 -; CHECK-NEXT: ret i1 [[ICMP]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[ARG]], 1 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ADD]], [[ARG1:%.*]] +; CHECK-NEXT: [[ICMP2:%.*]] = icmp ne i32 [[MUL]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[ICMP]], [[ICMP2]] +; CHECK-NEXT: ret i1 [[OR]] ; %icmp = icmp ne i32 %arg, -1 %add = add i32 %arg, 1 diff --git a/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll b/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll index fa0b7f165a3adb..ef2416eccb839c 100644 --- a/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll +++ b/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll @@ -52,7 +52,12 @@ define i1 @n2_wrong_size(i4 %size0, i4 %size1, i4 %nmemb) { define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @n3_wrong_pred( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0 +; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1 +; CHECK-NEXT: [[PHITMP:%.*]] = xor i1 [[SMUL_OV]], true +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], [[PHITMP]] +; CHECK-NEXT: ret i1 [[OR]] ; %cmp = icmp ne i4 %size, 0 ; not 'eq' %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb) @@ -65,7 +70,11 @@ define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) { define i1 @n4_not_and(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @n4_not_and( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1 +; CHECK-NEXT: [[PHITMP:%.*]] = xor i1 [[SMUL_OV]], true +; CHECK-NEXT: [[OR:%.*]] = and i1 [[CMP]], [[PHITMP]] +; CHECK-NEXT: ret i1 [[OR]] ; %cmp = icmp eq i4 %size, 0 %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb) diff --git a/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov.ll b/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov.ll index 001a24c0f026e2..84462567271ac5 100644 --- a/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov.ll +++ b/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov.ll @@ -46,7 +46,11 @@ define i1 @n2_wrong_size(i4 %size0, i4 %size1, i4 %nmemb) { define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @n3_wrong_pred( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0 +; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[SMUL_OV]], [[CMP]] +; CHECK-NEXT: ret i1 [[AND]] ; %cmp = icmp eq i4 %size, 0 ; not 'ne' %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb) @@ -58,7 +62,10 @@ define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) { define i1 @n4_not_and(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @n4_not_and( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1 +; CHECK-NEXT: [[AND:%.*]] = or i1 [[SMUL_OV]], [[CMP]] +; CHECK-NEXT: ret i1 [[AND]] ; %cmp = icmp ne i4 %size, 0 %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb) diff --git a/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll b/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll index a1c317ec8ee290..c332b35fc1874d 100644 --- a/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll +++ b/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll @@ -52,7 +52,12 @@ define i1 @n2_wrong_size(i4 %size0, i4 %size1, i4 %nmemb) { define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @n3_wrong_pred( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0 +; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1 +; CHECK-NEXT: [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], [[PHITMP]] +; CHECK-NEXT: ret i1 [[OR]] ; %cmp = icmp ne i4 %size, 0 ; not 'eq' %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb) @@ -65,7 +70,11 @@ define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) { define i1 @n4_not_and(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @n4_not_and( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1 +; CHECK-NEXT: [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true +; CHECK-NEXT: [[OR:%.*]] = and i1 [[CMP]], [[PHITMP]] +; CHECK-NEXT: ret i1 [[OR]] ; %cmp = icmp eq i4 %size, 0 %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb) diff --git a/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov.ll b/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov.ll index f75b8a6282764e..9ab0819e214e7e 100644 --- a/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov.ll +++ b/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov.ll @@ -46,7 +46,11 @@ define i1 @n2_wrong_size(i4 %size0, i4 %size1, i4 %nmemb) { define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @n3_wrong_pred( -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0 +; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[UMUL_OV]], [[CMP]] +; CHECK-NEXT: ret i1 [[AND]] ; %cmp = icmp eq i4 %size, 0 ; not 'ne' %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb) @@ -58,7 +62,10 @@ define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) { define i1 @n4_not_and(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @n4_not_and( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1 +; CHECK-NEXT: [[AND:%.*]] = or i1 [[UMUL_OV]], [[CMP]] +; CHECK-NEXT: ret i1 [[AND]] ; %cmp = icmp ne i4 %size, 0 %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb) diff --git a/llvm/test/Transforms/InstSimplify/result-of-add-of-negative-is-non-zero-and-no-underflow.ll b/llvm/test/Transforms/InstSimplify/result-of-add-of-negative-is-non-zero-and-no-underflow.ll index 38bc66ff4d6924..abcbb78889f1d7 100644 --- a/llvm/test/Transforms/InstSimplify/result-of-add-of-negative-is-non-zero-and-no-underflow.ll +++ b/llvm/test/Transforms/InstSimplify/result-of-add-of-negative-is-non-zero-and-no-underflow.ll @@ -18,7 +18,10 @@ define i1 @t1(i8 %base, i8 %offset) { ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] ; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ult i8 [[ADJUSTED]], [[BASE]] +; CHECK-NEXT: [[R:%.*]] = or i1 [[NOT_NULL]], [[NO_UNDERFLOW]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp slt i8 %base, 0 call void @llvm.assume(i1 %cmp) @@ -36,7 +39,10 @@ define i1 @t2_commutative(i8 %base, i8 %offset) { ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] ; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i8 [[ADJUSTED]], 0 +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ugt i8 [[BASE]], [[ADJUSTED]] +; CHECK-NEXT: [[R:%.*]] = or i1 [[NOT_NULL]], [[NO_UNDERFLOW]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp slt i8 %base, 0 call void @llvm.assume(i1 %cmp) @@ -57,7 +63,10 @@ define i1 @t3(i8 %base, i8 %offset) { ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] ; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp eq i8 [[ADJUSTED]], 0 +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp uge i8 [[ADJUSTED]], [[BASE]] +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOT_NULL]], [[NO_UNDERFLOW]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp slt i8 %base, 0 call void @llvm.assume(i1 %cmp) @@ -75,7 +84,10 @@ define i1 @t4_commutative(i8 %base, i8 %offset) { ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: [[ADJUSTED:%.*]] = add i8 [[BASE]], [[OFFSET:%.*]] ; CHECK-NEXT: call void @use8(i8 [[ADJUSTED]]) -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp eq i8 [[ADJUSTED]], 0 +; CHECK-NEXT: [[NO_UNDERFLOW:%.*]] = icmp ule i8 [[BASE]], [[ADJUSTED]] +; CHECK-NEXT: [[R:%.*]] = and i1 [[NOT_NULL]], [[NO_UNDERFLOW]] +; CHECK-NEXT: ret i1 [[R]] ; %cmp = icmp slt i8 %base, 0 call void @llvm.assume(i1 %cmp) diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll index c4030af943a903..c82800ec11a12e 100644 --- a/llvm/test/Transforms/PGOProfile/chr.ll +++ b/llvm/test/Transforms/PGOProfile/chr.ll @@ -1298,11 +1298,12 @@ define i32 @test_chr_14(ptr %i, ptr %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Z_FR:%.*]] = freeze i32 [[Z:%.*]] ; CHECK-NEXT: [[I0:%.*]] = load i32, ptr [[I:%.*]], align 4 -; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[Z_FR]], 1 -; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] +; CHECK-NEXT: [[V1_NOT:%.*]] = icmp eq i32 [[Z_FR]], 1 +; CHECK-NEXT: br i1 [[V1_NOT]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] ; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[PRED_FR:%.*]] = freeze i1 [[PRED:%.*]] ; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z_FR]], 0 -; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]] +; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED_FR]] ; CHECK-NEXT: br i1 [[V3_NONCHR]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]] ; CHECK: bb0.nonchr: ; CHECK-NEXT: call void @foo() From 0f8615f4dc568f4d7cbf73580eef3e78f64f3bd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 30 Oct 2023 10:35:43 -0700 Subject: [PATCH 054/144] [flang][openacc][openmp] Set correct location on atomic operations (#70680) The location set on atomic operations in both OpenMP and OpenACC was completly off. The real location needs to be created from the source CharBlock of the parse tree node of the respective atomic statement. This patch updates locations in lowering for atomic operations. --- flang/lib/Lower/DirectivesCommon.h | 65 ++++++++++++-------------- flang/lib/Lower/OpenACC.cpp | 14 ++++-- flang/lib/Lower/OpenMP.cpp | 19 ++++++-- flang/test/Lower/OpenACC/locations.f90 | 52 +++++++++++++++++++++ 4 files changed, 104 insertions(+), 46 deletions(-) diff --git a/flang/lib/Lower/DirectivesCommon.h b/flang/lib/Lower/DirectivesCommon.h index 1b231ee1b891ba..33b8198a2518be 100644 --- a/flang/lib/Lower/DirectivesCommon.h +++ b/flang/lib/Lower/DirectivesCommon.h @@ -132,10 +132,9 @@ static inline void genOmpAccAtomicCaptureStatement( mlir::Value toAddress, [[maybe_unused]] const AtomicListT *leftHandClauseList, [[maybe_unused]] const AtomicListT *rightHandClauseList, - mlir::Type elementType) { + mlir::Type elementType, mlir::Location loc) { // Generate `atomic.read` operation for atomic assigment statements fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::Location currentLocation = converter.getCurrentLocation(); if constexpr (std::is_same()) { @@ -151,12 +150,11 @@ static inline void genOmpAccAtomicCaptureStatement( genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint, memoryOrder); firOpBuilder.create( - currentLocation, fromAddress, toAddress, - mlir::TypeAttr::get(elementType), hint, memoryOrder); + loc, fromAddress, toAddress, mlir::TypeAttr::get(elementType), hint, + memoryOrder); } else { firOpBuilder.create( - currentLocation, fromAddress, toAddress, - mlir::TypeAttr::get(elementType)); + loc, fromAddress, toAddress, mlir::TypeAttr::get(elementType)); } } @@ -166,11 +164,10 @@ template static inline void genOmpAccAtomicWriteStatement( Fortran::lower::AbstractConverter &converter, mlir::Value lhsAddr, mlir::Value rhsExpr, [[maybe_unused]] const AtomicListT *leftHandClauseList, - [[maybe_unused]] const AtomicListT *rightHandClauseList, + [[maybe_unused]] const AtomicListT *rightHandClauseList, mlir::Location loc, mlir::Value *evaluatedExprValue = nullptr) { // Generate `atomic.write` operation for atomic assignment statements fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::Location currentLocation = converter.getCurrentLocation(); if constexpr (std::is_same()) { @@ -184,11 +181,10 @@ static inline void genOmpAccAtomicWriteStatement( if (rightHandClauseList) genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint, memoryOrder); - firOpBuilder.create(currentLocation, lhsAddr, - rhsExpr, hint, memoryOrder); + firOpBuilder.create(loc, lhsAddr, rhsExpr, hint, + memoryOrder); } else { - firOpBuilder.create(currentLocation, lhsAddr, - rhsExpr); + firOpBuilder.create(loc, lhsAddr, rhsExpr); } } @@ -200,7 +196,7 @@ static inline void genOmpAccAtomicUpdateStatement( mlir::Type varType, const Fortran::parser::Variable &assignmentStmtVariable, const Fortran::parser::Expr &assignmentStmtExpr, [[maybe_unused]] const AtomicListT *leftHandClauseList, - [[maybe_unused]] const AtomicListT *rightHandClauseList, + [[maybe_unused]] const AtomicListT *rightHandClauseList, mlir::Location loc, mlir::Operation *atomicCaptureOp = nullptr) { // Generate `atomic.update` operation for atomic assignment statements fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -302,7 +298,7 @@ static inline void genOmpAccAtomicUpdateStatement( /// Processes an atomic construct with write clause. template void genOmpAccAtomicWrite(Fortran::lower::AbstractConverter &converter, - const AtomicT &atomicWrite) { + const AtomicT &atomicWrite, mlir::Location loc) { const AtomicListT *rightHandClauseList = nullptr; const AtomicListT *leftHandClauseList = nullptr; if constexpr (std::is_same void genOmpAccAtomicRead(Fortran::lower::AbstractConverter &converter, - const AtomicT &atomicRead) { + const AtomicT &atomicRead, mlir::Location loc) { const AtomicListT *rightHandClauseList = nullptr; const AtomicListT *leftHandClauseList = nullptr; if constexpr (std::is_same(loc, toAddress.getType(), fromAddress); genOmpAccAtomicCaptureStatement(converter, fromAddress, toAddress, leftHandClauseList, rightHandClauseList, - elementType); + elementType, loc); } /// Processes an atomic construct with update clause. template void genOmpAccAtomicUpdate(Fortran::lower::AbstractConverter &converter, - const AtomicT &atomicUpdate) { + const AtomicT &atomicUpdate, mlir::Location loc) { const AtomicListT *rightHandClauseList = nullptr; const AtomicListT *leftHandClauseList = nullptr; if constexpr (std::is_same( converter, lhsAddr, varType, assignmentStmtVariable, assignmentStmtExpr, - leftHandClauseList, rightHandClauseList); + leftHandClauseList, rightHandClauseList, loc); } /// Processes an atomic construct with no clause - which implies update clause. template void genOmpAtomic(Fortran::lower::AbstractConverter &converter, - const AtomicT &atomicConstruct) { + const AtomicT &atomicConstruct, mlir::Location loc) { const AtomicListT &atomicClauseList = std::get(atomicConstruct.t); const auto &assignmentStmtExpr = std::get( @@ -420,15 +415,14 @@ void genOmpAtomic(Fortran::lower::AbstractConverter &converter, // the update clause is specified (for both OpenMP and OpenACC). genOmpAccAtomicUpdateStatement( converter, lhsAddr, varType, assignmentStmtVariable, assignmentStmtExpr, - &atomicClauseList, nullptr); + &atomicClauseList, nullptr, loc); } /// Processes an atomic construct with capture clause. template void genOmpAccAtomicCapture(Fortran::lower::AbstractConverter &converter, - const AtomicT &atomicCapture) { + const AtomicT &atomicCapture, mlir::Location loc) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::Location currentLocation = converter.getCurrentLocation(); const Fortran::parser::AssignmentStmt &stmt1 = std::get(atomicCapture.t).v.statement; @@ -480,11 +474,10 @@ void genOmpAccAtomicCapture(Fortran::lower::AbstractConverter &converter, memoryOrder); genOmpAtomicHintAndMemoryOrderClauses(converter, rightHandClauseList, hint, memoryOrder); - atomicCaptureOp = firOpBuilder.create( - currentLocation, hint, memoryOrder); - } else { atomicCaptureOp = - firOpBuilder.create(currentLocation); + firOpBuilder.create(loc, hint, memoryOrder); + } else { + atomicCaptureOp = firOpBuilder.create(loc); } firOpBuilder.createBlock(&(atomicCaptureOp->getRegion(0))); @@ -499,11 +492,11 @@ void genOmpAccAtomicCapture(Fortran::lower::AbstractConverter &converter, genOmpAccAtomicCaptureStatement( converter, stmt1RHSArg, stmt1LHSArg, /*leftHandClauseList=*/nullptr, - /*rightHandClauseList=*/nullptr, elementType); + /*rightHandClauseList=*/nullptr, elementType, loc); genOmpAccAtomicUpdateStatement( converter, stmt1RHSArg, stmt2VarType, stmt2Var, stmt2Expr, /*leftHandClauseList=*/nullptr, - /*rightHandClauseList=*/nullptr, atomicCaptureOp); + /*rightHandClauseList=*/nullptr, loc, atomicCaptureOp); } else { // Atomic capture construct is of the form [capture-stmt, write-stmt] const Fortran::semantics::SomeExpr &fromExpr = @@ -512,11 +505,11 @@ void genOmpAccAtomicCapture(Fortran::lower::AbstractConverter &converter, genOmpAccAtomicCaptureStatement( converter, stmt1RHSArg, stmt1LHSArg, /*leftHandClauseList=*/nullptr, - /*rightHandClauseList=*/nullptr, elementType); + /*rightHandClauseList=*/nullptr, elementType, loc); genOmpAccAtomicWriteStatement( converter, stmt1RHSArg, stmt2RHSArg, /*leftHandClauseList=*/nullptr, - /*rightHandClauseList=*/nullptr); + /*rightHandClauseList=*/nullptr, loc); } } else { // Atomic capture construct is of the form [update-stmt, capture-stmt] @@ -527,19 +520,19 @@ void genOmpAccAtomicCapture(Fortran::lower::AbstractConverter &converter, genOmpAccAtomicCaptureStatement( converter, stmt1LHSArg, stmt2LHSArg, /*leftHandClauseList=*/nullptr, - /*rightHandClauseList=*/nullptr, elementType); + /*rightHandClauseList=*/nullptr, elementType, loc); firOpBuilder.setInsertionPointToStart(&block); genOmpAccAtomicUpdateStatement( converter, stmt1LHSArg, stmt1VarType, stmt1Var, stmt1Expr, /*leftHandClauseList=*/nullptr, - /*rightHandClauseList=*/nullptr, atomicCaptureOp); + /*rightHandClauseList=*/nullptr, loc, atomicCaptureOp); } firOpBuilder.setInsertionPointToEnd(&block); if constexpr (std::is_same()) { - firOpBuilder.create(currentLocation); + firOpBuilder.create(loc); } else { - firOpBuilder.create(currentLocation); + firOpBuilder.create(loc); } firOpBuilder.setInsertionPointToStart(&block); } diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 3f7ef5e5747126..8218af691b79c8 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -3306,25 +3306,29 @@ static void genACC(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenACCAtomicConstruct &atomicConstruct) { + + mlir::Location loc = converter.genLocation(atomicConstruct.source); std::visit( Fortran::common::visitors{ [&](const Fortran::parser::AccAtomicRead &atomicRead) { Fortran::lower::genOmpAccAtomicRead(converter, atomicRead); + void>(converter, atomicRead, + loc); }, [&](const Fortran::parser::AccAtomicWrite &atomicWrite) { Fortran::lower::genOmpAccAtomicWrite< - Fortran::parser::AccAtomicWrite, void>(converter, atomicWrite); + Fortran::parser::AccAtomicWrite, void>(converter, atomicWrite, + loc); }, [&](const Fortran::parser::AccAtomicUpdate &atomicUpdate) { Fortran::lower::genOmpAccAtomicUpdate< - Fortran::parser::AccAtomicUpdate, void>(converter, - atomicUpdate); + Fortran::parser::AccAtomicUpdate, void>(converter, atomicUpdate, + loc); }, [&](const Fortran::parser::AccAtomicCapture &atomicCapture) { Fortran::lower::genOmpAccAtomicCapture< Fortran::parser::AccAtomicCapture, void>(converter, - atomicCapture); + atomicCapture, loc); }, }, atomicConstruct.u); diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 0faaae6c08e047..1b9a03f74ac479 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -3060,29 +3060,38 @@ genOMP(Fortran::lower::AbstractConverter &converter, std::visit( Fortran::common::visitors{ [&](const Fortran::parser::OmpAtomicRead &atomicRead) { + mlir::Location loc = converter.genLocation(atomicRead.source); Fortran::lower::genOmpAccAtomicRead< Fortran::parser::OmpAtomicRead, - Fortran::parser::OmpAtomicClauseList>(converter, atomicRead); + Fortran::parser::OmpAtomicClauseList>(converter, atomicRead, + loc); }, [&](const Fortran::parser::OmpAtomicWrite &atomicWrite) { + mlir::Location loc = converter.genLocation(atomicWrite.source); Fortran::lower::genOmpAccAtomicWrite< Fortran::parser::OmpAtomicWrite, - Fortran::parser::OmpAtomicClauseList>(converter, atomicWrite); + Fortran::parser::OmpAtomicClauseList>(converter, atomicWrite, + loc); }, [&](const Fortran::parser::OmpAtomic &atomicConstruct) { + mlir::Location loc = converter.genLocation(atomicConstruct.source); Fortran::lower::genOmpAtomic( - converter, atomicConstruct); + converter, atomicConstruct, loc); }, [&](const Fortran::parser::OmpAtomicUpdate &atomicUpdate) { + mlir::Location loc = converter.genLocation(atomicUpdate.source); Fortran::lower::genOmpAccAtomicUpdate< Fortran::parser::OmpAtomicUpdate, - Fortran::parser::OmpAtomicClauseList>(converter, atomicUpdate); + Fortran::parser::OmpAtomicClauseList>(converter, atomicUpdate, + loc); }, [&](const Fortran::parser::OmpAtomicCapture &atomicCapture) { + mlir::Location loc = converter.genLocation(atomicCapture.source); Fortran::lower::genOmpAccAtomicCapture< Fortran::parser::OmpAtomicCapture, - Fortran::parser::OmpAtomicClauseList>(converter, atomicCapture); + Fortran::parser::OmpAtomicClauseList>(converter, atomicCapture, + loc); }, }, atomicConstruct.u); diff --git a/flang/test/Lower/OpenACC/locations.f90 b/flang/test/Lower/OpenACC/locations.f90 index 19788f9f6d1aa2..031d8eda48acdc 100644 --- a/flang/test/Lower/OpenACC/locations.f90 +++ b/flang/test/Lower/OpenACC/locations.f90 @@ -111,4 +111,56 @@ subroutine if_clause_expr_location(arr) !CHECK-NEXT: } loc("{{.*}}locations.f90":99:11) end subroutine + subroutine atomic_read_loc() + integer(4) :: x + integer(8) :: y + + !$acc atomic read + y = x + end + !CHECK: acc.atomic.read {{.*}} loc("{{.*}}locations.f90":118:11) + + subroutine atomic_capture_loc() + implicit none + integer :: k, v, i + + k = 1 + v = 0 + + !$acc atomic capture + v = k + k = (i + 1) * 3.14 + !$acc end atomic + +! CHECK: acc.atomic.capture { +! CHECK: acc.atomic.read {{.*}} loc("{{.*}}locations.f90":130:11) +! CHECK: acc.atomic.write {{.*}} loc("{{.*}}locations.f90":130:11) +! CHECK: } loc("{{.*}}locations.f90":130:11) + + end subroutine + + subroutine atomic_update_loc() + implicit none + integer :: x, y, z + + !$acc atomic + y = y + 1 +! CHECK: acc.atomic.update %{{.*}} : !fir.ref { +! CHECK: ^bb0(%{{.*}}: i32 loc("{{.*}}locations.f90":142:3)): +! CHECK: } loc("{{.*}}locations.f90":142:3) + + !$acc atomic update + z = x * z + + ! %3 = fir.load %0 : !fir.ref loc("/local/home/vclement/llvm-project/flang/test/Lower/OpenACC/locations.f90":142:3) + ! acc.atomic.update %2 : !fir.ref { + ! ^bb0(%arg0: i32 loc("/local/home/vclement/llvm-project/flang/test/Lower/OpenACC/locations.f90":142:3)): + ! %4 = arith.muli %3, %arg0 : i32 loc("/local/home/vclement/llvm-project/flang/test/Lower/OpenACC/locations.f90":142:3) + ! acc.yield %4 : i32 loc("/local/home/vclement/llvm-project/flang/test/Lower/OpenACC/locations.f90":142:3) + ! } loc("/local/home/vclement/llvm-project/flang/test/Lower/OpenACC/locations.f90":142:3) + end subroutine + + end module + + From b45236f13391dbe03fb9ea029b1a42e6dd989de4 Mon Sep 17 00:00:00 2001 From: DaPorkchop_ Date: Mon, 30 Oct 2023 19:15:36 +0100 Subject: [PATCH 055/144] [clang] Implement constexpr bit_cast for vectors (#66894) This makes __builtin_bit_cast support converting to and from vector types in a constexpr context. --- .../include/clang/Basic/DiagnosticASTKinds.td | 5 +- clang/lib/AST/ExprConstant.cpp | 269 ++++++++++++------ clang/lib/CodeGen/CGExprConstant.cpp | 3 + clang/test/CodeGen/const-init.c | 9 +- .../constexpr-builtin-bit-cast-fp80.cpp | 66 +++++ .../SemaCXX/constexpr-builtin-bit-cast.cpp | 44 +++ 6 files changed, 297 insertions(+), 99 deletions(-) create mode 100644 clang/test/SemaCXX/constexpr-builtin-bit-cast-fp80.cpp diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td index 492f6b270ca520..031117f2c4137a 100644 --- a/clang/include/clang/Basic/DiagnosticASTKinds.td +++ b/clang/include/clang/Basic/DiagnosticASTKinds.td @@ -317,7 +317,7 @@ def note_constexpr_memcpy_unsupported : Note< "source is not a contiguous array of at least %4 elements of type %3|" "destination is not a contiguous array of at least %4 elements of type %3}2">; def note_constexpr_bit_cast_unsupported_type : Note< - "constexpr bit_cast involving type %0 is not yet supported">; + "constexpr bit cast involving type %0 is not yet supported">; def note_constexpr_bit_cast_unsupported_bitfield : Note< "constexpr bit_cast involving bit-field is not yet supported">; def note_constexpr_bit_cast_invalid_type : Note< @@ -326,6 +326,9 @@ def note_constexpr_bit_cast_invalid_type : Note< "%select{type|member}1 is not allowed in a constant expression">; def note_constexpr_bit_cast_invalid_subtype : Note< "invalid type %0 is a %select{member|base}1 of %2">; +def note_constexpr_bit_cast_invalid_vector : Note< + "bit_cast involving type %0 is not allowed in a constant expression; " + "element size %1 * element count %2 is not a multiple of the byte size %3">; def note_constexpr_bit_cast_indet_dest : Note< "indeterminate value can only initialize an object of type 'unsigned char'" "%select{, 'char',|}1 or 'std::byte'; %0 is invalid">; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 5947805f9576ff..f6f71ce6bfe0f5 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -2737,53 +2737,6 @@ static bool truncateBitfieldValue(EvalInfo &Info, const Expr *E, return true; } -static bool EvalAndBitcastToAPInt(EvalInfo &Info, const Expr *E, - llvm::APInt &Res) { - APValue SVal; - if (!Evaluate(SVal, Info, E)) - return false; - if (SVal.isInt()) { - Res = SVal.getInt(); - return true; - } - if (SVal.isFloat()) { - Res = SVal.getFloat().bitcastToAPInt(); - return true; - } - if (SVal.isVector()) { - QualType VecTy = E->getType(); - unsigned VecSize = Info.Ctx.getTypeSize(VecTy); - QualType EltTy = VecTy->castAs()->getElementType(); - unsigned EltSize = Info.Ctx.getTypeSize(EltTy); - bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian(); - Res = llvm::APInt::getZero(VecSize); - for (unsigned i = 0; i < SVal.getVectorLength(); i++) { - APValue &Elt = SVal.getVectorElt(i); - llvm::APInt EltAsInt; - if (Elt.isInt()) { - EltAsInt = Elt.getInt(); - } else if (Elt.isFloat()) { - EltAsInt = Elt.getFloat().bitcastToAPInt(); - } else { - // Don't try to handle vectors of anything other than int or float - // (not sure if it's possible to hit this case). - Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); - return false; - } - unsigned BaseEltSize = EltAsInt.getBitWidth(); - if (BigEndian) - Res |= EltAsInt.zextOrTrunc(VecSize).rotr(i*EltSize+BaseEltSize); - else - Res |= EltAsInt.zextOrTrunc(VecSize).rotl(i*EltSize); - } - return true; - } - // Give up if the input isn't an int, float, or vector. For example, we - // reject "(v4i16)(intptr_t)&a". - Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); - return false; -} - /// Perform the given integer operation, which is known to need at most BitWidth /// bits, and check for overflow in the original type (if that type was not an /// unsigned type). @@ -7023,10 +6976,11 @@ class APValueToBufferConverter { return visitArray(Val, Ty, Offset); case APValue::Struct: return visitRecord(Val, Ty, Offset); + case APValue::Vector: + return visitVector(Val, Ty, Offset); case APValue::ComplexInt: case APValue::ComplexFloat: - case APValue::Vector: case APValue::FixedPoint: // FIXME: We should support these. @@ -7113,6 +7067,72 @@ class APValueToBufferConverter { return true; } + bool visitVector(const APValue &Val, QualType Ty, CharUnits Offset) { + const VectorType *VTy = Ty->castAs(); + QualType EltTy = VTy->getElementType(); + unsigned NElts = VTy->getNumElements(); + unsigned EltSize = + VTy->isExtVectorBoolType() ? 1 : Info.Ctx.getTypeSize(EltTy); + + if ((NElts * EltSize) % Info.Ctx.getCharWidth() != 0) { + // The vector's size in bits is not a multiple of the target's byte size, + // so its layout is unspecified. For now, we'll simply treat these cases + // as unsupported (this should only be possible with OpenCL bool vectors + // whose element count isn't a multiple of the byte size). + Info.FFDiag(BCE->getBeginLoc(), + diag::note_constexpr_bit_cast_invalid_vector) + << Ty.getCanonicalType() << EltSize << NElts + << Info.Ctx.getCharWidth(); + return false; + } + + if (EltTy->isRealFloatingType() && &Info.Ctx.getFloatTypeSemantics(EltTy) == + &APFloat::x87DoubleExtended()) { + // The layout for x86_fp80 vectors seems to be handled very inconsistently + // by both clang and LLVM, so for now we won't allow bit_casts involving + // it in a constexpr context. + Info.FFDiag(BCE->getBeginLoc(), + diag::note_constexpr_bit_cast_unsupported_type) + << EltTy; + return false; + } + + if (VTy->isExtVectorBoolType()) { + // Special handling for OpenCL bool vectors: + // Since these vectors are stored as packed bits, but we can't write + // individual bits to the BitCastBuffer, we'll buffer all of the elements + // together into an appropriately sized APInt and write them all out at + // once. Because we don't accept vectors where NElts * EltSize isn't a + // multiple of the char size, there will be no padding space, so we don't + // have to worry about writing data which should have been left + // uninitialized. + bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian(); + + llvm::APInt Res = llvm::APInt::getZero(NElts); + for (unsigned I = 0; I < NElts; ++I) { + const llvm::APSInt &EltAsInt = Val.getVectorElt(I).getInt(); + assert(EltAsInt.isUnsigned() && EltAsInt.getBitWidth() == 1 && + "bool vector element must be 1-bit unsigned integer!"); + + Res.insertBits(EltAsInt, BigEndian ? (NElts - I - 1) : I); + } + + SmallVector Bytes(NElts / 8); + llvm::StoreIntToMemory(Res, &*Bytes.begin(), NElts / 8); + Buffer.writeObject(Offset, Bytes); + } else { + // Iterate over each of the elements and write them out to the buffer at + // the appropriate offset. + CharUnits EltSizeChars = Info.Ctx.getTypeSizeInChars(EltTy); + for (unsigned I = 0; I < NElts; ++I) { + if (!visit(Val.getVectorElt(I), EltTy, Offset + I * EltSizeChars)) + return false; + } + } + + return true; + } + bool visitInt(const APSInt &Val, QualType Ty, CharUnits Offset) { APSInt AdjustedVal = Val; unsigned Width = AdjustedVal.getBitWidth(); @@ -7121,7 +7141,7 @@ class APValueToBufferConverter { AdjustedVal = AdjustedVal.extend(Width); } - SmallVector Bytes(Width / 8); + SmallVector Bytes(Width / 8); llvm::StoreIntToMemory(AdjustedVal, &*Bytes.begin(), Width / 8); Buffer.writeObject(Offset, Bytes); return true; @@ -7322,6 +7342,77 @@ class BufferToAPValueConverter { return ArrayValue; } + std::optional visit(const VectorType *VTy, CharUnits Offset) { + QualType EltTy = VTy->getElementType(); + unsigned NElts = VTy->getNumElements(); + unsigned EltSize = + VTy->isExtVectorBoolType() ? 1 : Info.Ctx.getTypeSize(EltTy); + + if ((NElts * EltSize) % Info.Ctx.getCharWidth() != 0) { + // The vector's size in bits is not a multiple of the target's byte size, + // so its layout is unspecified. For now, we'll simply treat these cases + // as unsupported (this should only be possible with OpenCL bool vectors + // whose element count isn't a multiple of the byte size). + Info.FFDiag(BCE->getBeginLoc(), + diag::note_constexpr_bit_cast_invalid_vector) + << QualType(VTy, 0) << EltSize << NElts << Info.Ctx.getCharWidth(); + return std::nullopt; + } + + if (EltTy->isRealFloatingType() && &Info.Ctx.getFloatTypeSemantics(EltTy) == + &APFloat::x87DoubleExtended()) { + // The layout for x86_fp80 vectors seems to be handled very inconsistently + // by both clang and LLVM, so for now we won't allow bit_casts involving + // it in a constexpr context. + Info.FFDiag(BCE->getBeginLoc(), + diag::note_constexpr_bit_cast_unsupported_type) + << EltTy; + return std::nullopt; + } + + SmallVector Elts; + Elts.reserve(NElts); + if (VTy->isExtVectorBoolType()) { + // Special handling for OpenCL bool vectors: + // Since these vectors are stored as packed bits, but we can't read + // individual bits from the BitCastBuffer, we'll buffer all of the + // elements together into an appropriately sized APInt and write them all + // out at once. Because we don't accept vectors where NElts * EltSize + // isn't a multiple of the char size, there will be no padding space, so + // we don't have to worry about reading any padding data which didn't + // actually need to be accessed. + bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian(); + + SmallVector Bytes; + Bytes.reserve(NElts / 8); + if (!Buffer.readObject(Offset, CharUnits::fromQuantity(NElts / 8), Bytes)) + return std::nullopt; + + APSInt SValInt(NElts, true); + llvm::LoadIntFromMemory(SValInt, &*Bytes.begin(), Bytes.size()); + + for (unsigned I = 0; I < NElts; ++I) { + llvm::APInt Elt = + SValInt.extractBits(1, (BigEndian ? NElts - I - 1 : I) * EltSize); + Elts.emplace_back( + APSInt(std::move(Elt), !EltTy->isSignedIntegerType())); + } + } else { + // Iterate over each of the elements and read them from the buffer at + // the appropriate offset. + CharUnits EltSizeChars = Info.Ctx.getTypeSizeInChars(EltTy); + for (unsigned I = 0; I < NElts; ++I) { + std::optional EltValue = + visitType(EltTy, Offset + I * EltSizeChars); + if (!EltValue) + return std::nullopt; + Elts.push_back(std::move(*EltValue)); + } + } + + return APValue(Elts.data(), Elts.size()); + } + std::optional visit(const Type *Ty, CharUnits Offset) { return unsupportedType(QualType(Ty, 0)); } @@ -7421,25 +7512,15 @@ static bool checkBitCastConstexprEligibility(EvalInfo *Info, return SourceOK; } -static bool handleLValueToRValueBitCast(EvalInfo &Info, APValue &DestValue, - APValue &SourceValue, +static bool handleRValueToRValueBitCast(EvalInfo &Info, APValue &DestValue, + const APValue &SourceRValue, const CastExpr *BCE) { assert(CHAR_BIT == 8 && Info.Ctx.getTargetInfo().getCharWidth() == 8 && "no host or target supports non 8-bit chars"); - assert(SourceValue.isLValue() && - "LValueToRValueBitcast requires an lvalue operand!"); if (!checkBitCastConstexprEligibility(&Info, Info.Ctx, BCE)) return false; - LValue SourceLValue; - APValue SourceRValue; - SourceLValue.setFrom(Info.Ctx, SourceValue); - if (!handleLValueToRValueConversion( - Info, BCE, BCE->getSubExpr()->getType().withConst(), SourceLValue, - SourceRValue, /*WantObjectRepresentation=*/true)) - return false; - // Read out SourceValue into a char buffer. std::optional Buffer = APValueToBufferConverter::convert(Info, SourceRValue, BCE); @@ -7456,6 +7537,25 @@ static bool handleLValueToRValueBitCast(EvalInfo &Info, APValue &DestValue, return true; } +static bool handleLValueToRValueBitCast(EvalInfo &Info, APValue &DestValue, + APValue &SourceValue, + const CastExpr *BCE) { + assert(CHAR_BIT == 8 && Info.Ctx.getTargetInfo().getCharWidth() == 8 && + "no host or target supports non 8-bit chars"); + assert(SourceValue.isLValue() && + "LValueToRValueBitcast requires an lvalue operand!"); + + LValue SourceLValue; + APValue SourceRValue; + SourceLValue.setFrom(Info.Ctx, SourceValue); + if (!handleLValueToRValueConversion( + Info, BCE, BCE->getSubExpr()->getType().withConst(), SourceLValue, + SourceRValue, /*WantObjectRepresentation=*/true)) + return false; + + return handleRValueToRValueBitCast(Info, DestValue, SourceRValue, BCE); +} + template class ExprEvaluatorBase : public ConstStmtVisitor { @@ -10540,41 +10640,22 @@ bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) { return Success(Elts, E); } case CK_BitCast: { - // Evaluate the operand into an APInt we can extract from. - llvm::APInt SValInt; - if (!EvalAndBitcastToAPInt(Info, SE, SValInt)) + APValue SVal; + if (!Evaluate(SVal, Info, SE)) + return false; + + if (!SVal.isInt() && !SVal.isFloat() && !SVal.isVector()) { + // Give up if the input isn't an int, float, or vector. For example, we + // reject "(v4i16)(intptr_t)&a". + Info.FFDiag(E, diag::note_constexpr_invalid_cast) + << 2 << Info.Ctx.getLangOpts().CPlusPlus; return false; - // Extract the elements - QualType EltTy = VTy->getElementType(); - unsigned EltSize = Info.Ctx.getTypeSize(EltTy); - bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian(); - SmallVector Elts; - if (EltTy->isRealFloatingType()) { - const llvm::fltSemantics &Sem = Info.Ctx.getFloatTypeSemantics(EltTy); - unsigned FloatEltSize = EltSize; - if (&Sem == &APFloat::x87DoubleExtended()) - FloatEltSize = 80; - for (unsigned i = 0; i < NElts; i++) { - llvm::APInt Elt; - if (BigEndian) - Elt = SValInt.rotl(i * EltSize + FloatEltSize).trunc(FloatEltSize); - else - Elt = SValInt.rotr(i * EltSize).trunc(FloatEltSize); - Elts.push_back(APValue(APFloat(Sem, Elt))); - } - } else if (EltTy->isIntegerType()) { - for (unsigned i = 0; i < NElts; i++) { - llvm::APInt Elt; - if (BigEndian) - Elt = SValInt.rotl(i*EltSize+EltSize).zextOrTrunc(EltSize); - else - Elt = SValInt.rotr(i*EltSize).zextOrTrunc(EltSize); - Elts.push_back(APValue(APSInt(Elt, !EltTy->isSignedIntegerType()))); - } - } else { - return Error(E); } - return Success(Elts, E); + + if (!handleRValueToRValueBitCast(Info, Result, SVal, E)) + return false; + + return true; } default: return ExprEvaluatorBaseTy::VisitCastExpr(E); diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 3f508032e30d65..c46f38d651972b 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -2152,6 +2152,9 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, Inits[I] = llvm::ConstantInt::get(CGM.getLLVMContext(), Elt.getInt()); else if (Elt.isFloat()) Inits[I] = llvm::ConstantFP::get(CGM.getLLVMContext(), Elt.getFloat()); + else if (Elt.isIndeterminate()) + Inits[I] = llvm::UndefValue::get(CGM.getTypes().ConvertType( + DestType->castAs()->getElementType())); else llvm_unreachable("unsupported vector element type"); } diff --git a/clang/test/CodeGen/const-init.c b/clang/test/CodeGen/const-init.c index 7062393c450226..0e4fc4ad48af8d 100644 --- a/clang/test/CodeGen/const-init.c +++ b/clang/test/CodeGen/const-init.c @@ -140,11 +140,12 @@ void g28(void) { typedef short v12i16 __attribute((vector_size(24))); typedef long double v2f80 __attribute((vector_size(24))); // CHECK: @g28.a = internal global <1 x i64> - // CHECK: @g28.b = internal global <12 x i16> - // CHECK: @g28.c = internal global <2 x x86_fp80> , align 32 + // @g28.b = internal global <12 x i16> + // @g28.c = internal global <2 x x86_fp80> , align 32 static v1i64 a = (v1i64)10LL; - static v12i16 b = (v12i16)(v2f80){1,2}; - static v2f80 c = (v2f80)(v12i16){0,0,0,-32768,16383,0,0,0,0,-32768,16384,0}; + //FIXME: support constant bitcast between vectors of x86_fp80 + //static v12i16 b = (v12i16)(v2f80){1,2}; + //static v2f80 c = (v2f80)(v12i16){0,0,0,-32768,16383,0,0,0,0,-32768,16384,0}; } // PR13643 diff --git a/clang/test/SemaCXX/constexpr-builtin-bit-cast-fp80.cpp b/clang/test/SemaCXX/constexpr-builtin-bit-cast-fp80.cpp new file mode 100644 index 00000000000000..b37b362c81e75e --- /dev/null +++ b/clang/test/SemaCXX/constexpr-builtin-bit-cast-fp80.cpp @@ -0,0 +1,66 @@ +// RUN: %clang_cc1 -verify -std=c++2a -fsyntax-only -triple i386-pc-linux-gnu %s + +// This is separate from constexpr-builtin-bit-cast.cpp because we want to +// compile for i386 so that sizeof(long double) is 12. + +typedef long double fp80x2_v __attribute__((ext_vector_type(2))); + +static_assert(sizeof(long double) == 12, ""); +static_assert(sizeof(fp80x2_v) == 32, ""); + +struct fp80x2_s { + char _data[2 * 10]; + unsigned char _pad[sizeof(fp80x2_v) - 2 * 10]; + + constexpr bool operator==(const fp80x2_s& rhs) const { + for (int i = 0; i < 2 * 10; ++i) + if (_data[i] != rhs._data[i]) + return false; + return true; + } +}; + +namespace builtin_bit_cast { + constexpr static fp80x2_v test_vec_fp80 = { 1, 2 }; + constexpr static fp80x2_s test_str_fp80 = { { 0, 0, 0, 0, 0, 0, 0, -128, -1, 63, 0, 0, 0, 0, 0, 0, 0, -128, 0, 64 }, {} }; + + // expected-error@+2 {{static assertion expression is not an integral constant expression}} + // expected-note@+1 {{constexpr bit cast involving type 'long double' is not yet supported}} + static_assert(__builtin_bit_cast(fp80x2_s, test_vec_fp80) == test_str_fp80, ""); + + // expected-error@+2 {{static assertion expression is not an integral constant expression}} + // expected-note@+1 {{constexpr bit cast involving type 'long double' is not yet supported}} + static_assert(__builtin_bit_cast(fp80x2_s, __builtin_bit_cast(fp80x2_v, test_str_fp80)) == test_str_fp80, ""); + + // expected-error@+2 {{constexpr variable 'bad_str_fp80_0' must be initialized by a constant expression}} + // expected-note@+1 {{constexpr bit cast involving type 'long double' is not yet supported}} + constexpr static char bad_str_fp80_0 = __builtin_bit_cast(fp80x2_s, test_vec_fp80)._pad[0]; + + // expected-error@+2 {{constexpr variable 'bad_str_fp80_1' must be initialized by a constant expression}} + // expected-note@+1 {{constexpr bit cast involving type 'long double' is not yet supported}} + constexpr static char bad_str_fp80_1 = __builtin_bit_cast(fp80x2_s, test_vec_fp80)._pad[1]; + + // expected-error@+2 {{constexpr variable 'bad_str_fp80_11' must be initialized by a constant expression}} + // expected-note@+1 {{constexpr bit cast involving type 'long double' is not yet supported}} + constexpr static char bad_str_fp80_11 = __builtin_bit_cast(fp80x2_s, test_vec_fp80)._pad[11]; + + // expected-error@+2 {{constexpr variable 'struct2v' must be initialized by a constant expression}} + // expected-note@+1 {{constexpr bit cast involving type 'long double' is not yet supported}} + constexpr static fp80x2_v struct2v = __builtin_bit_cast(fp80x2_v, test_str_fp80); +} + +namespace c_cast { + typedef short v12i16 __attribute((vector_size(24))); + typedef long double v2f80 __attribute((vector_size(24))); + + // FIXME: re-enable the corresponding test cases in CodeGen/const-init.c when + // constexpr bitcast with x86_fp80 is supported + + // expected-error@+2 {{constexpr variable 'b' must be initialized by a constant expression}} + // expected-note@+1 {{constexpr bit cast involving type 'long double' is not yet supported}} + constexpr static v12i16 b = (v12i16)(v2f80){1,2}; + + // expected-error@+2 {{constexpr variable 'c' must be initialized by a constant expression}} + // expected-note@+1 {{constexpr bit cast involving type 'long double' is not yet supported}} + constexpr static v2f80 c = (v2f80)(v12i16){0,0,0,-32768,16383,0,0,0,0,-32768,16384,0}; +} diff --git a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp index a6ebe0572d063b..c5b8032f40b131 100644 --- a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp +++ b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp @@ -463,3 +463,47 @@ static_assert(bit_cast(ld539) == fivehundredandthirtynine, ""); static_assert(round_trip<__int128_t>(34.0L)); #endif } + +namespace test_vector { + +typedef unsigned uint2 __attribute__((vector_size(2 * sizeof(unsigned)))); +typedef char byte8 __attribute__((vector_size(sizeof(unsigned long long)))); + +constexpr uint2 test_vector = { 0x0C05FEFE, 0xCAFEBABE }; + +static_assert(bit_cast(test_vector) == (LITTLE_END + ? 0xCAFEBABE0C05FEFE + : 0x0C05FEFECAFEBABE), ""); + +static_assert(round_trip(0xCAFEBABE0C05FEFEULL), ""); +static_assert(round_trip(0xCAFEBABE0C05FEFEULL), ""); + +typedef bool bool8 __attribute__((ext_vector_type(8))); +typedef bool bool9 __attribute__((ext_vector_type(9))); +typedef bool bool16 __attribute__((ext_vector_type(16))); +typedef bool bool17 __attribute__((ext_vector_type(17))); +typedef bool bool32 __attribute__((ext_vector_type(32))); +typedef bool bool128 __attribute__((ext_vector_type(128))); + +static_assert(bit_cast(bool8{1,0,1,0,1,0,1,0}) == (LITTLE_END ? 0x55 : 0xAA), ""); +static_assert(round_trip(static_cast(0)), ""); +static_assert(round_trip(static_cast(1)), ""); +static_assert(round_trip(static_cast(0x55)), ""); + +static_assert(bit_cast(bool16{1,1,1,1,1,0,0,0, 1,1,1,1,0,1,0,0}) == (LITTLE_END ? 0x2F1F : 0xF8F4), ""); + +static_assert(round_trip(static_cast(0xCAFE)), ""); +static_assert(round_trip(static_cast(0xCAFEBABE)), ""); +static_assert(round_trip(static_cast<__int128_t>(0xCAFEBABE0C05FEFEULL)), ""); + +// expected-error@+2 {{constexpr variable 'bad_bool9_to_short' must be initialized by a constant expression}} +// expected-note@+1 {{bit_cast involving type 'bool __attribute__((ext_vector_type(9)))' (vector of 9 'bool' values) is not allowed in a constant expression; element size 1 * element count 9 is not a multiple of the byte size 8}} +constexpr unsigned short bad_bool9_to_short = __builtin_bit_cast(unsigned short, bool9{1,1,0,1,0,1,0,1,0}); +// expected-error@+2 {{constexpr variable 'bad_short_to_bool9' must be initialized by a constant expression}} +// expected-note@+1 {{bit_cast involving type 'bool __attribute__((ext_vector_type(9)))' (vector of 9 'bool' values) is not allowed in a constant expression; element size 1 * element count 9 is not a multiple of the byte size 8}} +constexpr bool9 bad_short_to_bool9 = __builtin_bit_cast(bool9, static_cast(0)); +// expected-error@+2 {{constexpr variable 'bad_int_to_bool17' must be initialized by a constant expression}} +// expected-note@+1 {{bit_cast involving type 'bool __attribute__((ext_vector_type(17)))' (vector of 17 'bool' values) is not allowed in a constant expression; element size 1 * element count 17 is not a multiple of the byte size 8}} +constexpr bool17 bad_int_to_bool17 = __builtin_bit_cast(bool17, 0x0001CAFEU); + +} From 1de5fe18d8e3935a1a0860068d4e23b7bb7e83b0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 30 Oct 2023 17:38:56 +0000 Subject: [PATCH 056/144] [MCA][X86] Add AVX512 FMA instruction test coverage --- .../llvm-mca/X86/Generic/resources-avx512.s | 170 ++++++++- .../llvm-mca/X86/Generic/resources-avx512vl.s | 338 +++++++++++++++++- .../X86/IceLakeServer/resources-avx512.s | 170 ++++++++- .../X86/IceLakeServer/resources-avx512vl.s | 338 +++++++++++++++++- .../X86/SapphireRapids/resources-avx512.s | 170 ++++++++- .../X86/SapphireRapids/resources-avx512vl.s | 338 +++++++++++++++++- .../X86/SkylakeServer/resources-avx512.s | 170 ++++++++- .../X86/SkylakeServer/resources-avx512vl.s | 338 +++++++++++++++++- .../llvm-mca/X86/Znver4/resources-avx512.s | 170 ++++++++- .../llvm-mca/X86/Znver4/resources-avx512vl.s | 338 +++++++++++++++++- 10 files changed, 2530 insertions(+), 10 deletions(-) diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s index 5b9b8cd6bb5fc6..a8937f7dcfd117 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s @@ -298,6 +298,66 @@ vdivps %zmm16, %zmm17, %zmm19 {z}{k1} vdivps (%rax), %zmm17, %zmm19 {z}{k1} vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vfmadd132pd %zmm16, %zmm17, %zmm19 +vfmadd132pd (%rax), %zmm17, %zmm19 +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd132pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd132pd (%rax), %zmm17, %zmm19 {k1} +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd132pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd132pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd213pd %zmm16, %zmm17, %zmm19 +vfmadd213pd (%rax), %zmm17, %zmm19 +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd213pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd213pd (%rax), %zmm17, %zmm19 {k1} +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd213pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd213pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd231pd %zmm16, %zmm17, %zmm19 +vfmadd231pd (%rax), %zmm17, %zmm19 +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd231pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd231pd (%rax), %zmm17, %zmm19 {k1} +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd231pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd231pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd132ps %zmm16, %zmm17, %zmm19 +vfmadd132ps (%rax), %zmm17, %zmm19 +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd132ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd132ps (%rax), %zmm17, %zmm19 {k1} +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd132ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd132ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + +vfmadd213ps %zmm16, %zmm17, %zmm19 +vfmadd213ps (%rax), %zmm17, %zmm19 +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd213ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd213ps (%rax), %zmm17, %zmm19 {k1} +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd213ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd213ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + +vfmadd231ps %zmm16, %zmm17, %zmm19 +vfmadd231ps (%rax), %zmm17, %zmm19 +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd231ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd231ps (%rax), %zmm17, %zmm19 {k1} +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd231ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd231ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + vgatherdpd (%rax,%ymm1,2), %zmm2 {k1} vgatherdps (%rax,%zmm1,2), %zmm2 {k1} vgatherqpd (%rax,%zmm1,2), %zmm2 {k1} @@ -1274,6 +1334,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 3 29 28.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 36 28.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 36 28.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 5 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd213pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 5 0.50 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd231pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 5 0.50 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd132ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 5 0.50 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd213ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 5 0.50 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd231ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 5 0.50 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1 5 0.50 * vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: 1 5 0.50 * vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 1 5 0.50 * vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} @@ -1913,7 +2027,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1506.00 170.00 307.00 16.00 522.00 281.50 281.50 +# CHECK-NEXT: - 1506.00 197.00 334.00 16.00 522.00 299.50 299.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2176,6 +2290,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - 28.00 2.50 - - 0.50 - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - 28.00 2.50 - - 0.50 0.50 0.50 vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - 28.00 2.50 - - 0.50 0.50 0.50 vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - 0.50 0.50 vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: - - - - - - 0.50 0.50 vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: - - - - - - 0.50 0.50 vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s index 84852a2a8b1563..e8e7a80f690bfa 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s @@ -418,6 +418,126 @@ vdivps %ymm16, %ymm17, %ymm19 {z}{k1} vdivps (%rax), %ymm17, %ymm19 {z}{k1} vdivps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vfmadd132pd %xmm16, %xmm17, %xmm19 +vfmadd132pd (%rax), %xmm17, %xmm19 +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd132pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd132pd (%rax), %xmm17, %xmm19 {k1} +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd132pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd132pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd132pd %ymm16, %ymm17, %ymm19 +vfmadd132pd (%rax), %ymm17, %ymm19 +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd132pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd132pd (%rax), %ymm17, %ymm19 {k1} +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd132pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd132pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd213pd %xmm16, %xmm17, %xmm19 +vfmadd213pd (%rax), %xmm17, %xmm19 +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd213pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd213pd (%rax), %xmm17, %xmm19 {k1} +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd213pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd213pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd213pd %ymm16, %ymm17, %ymm19 +vfmadd213pd (%rax), %ymm17, %ymm19 +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd213pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd213pd (%rax), %ymm17, %ymm19 {k1} +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd213pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd213pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd231pd %xmm16, %xmm17, %xmm19 +vfmadd231pd (%rax), %xmm17, %xmm19 +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd231pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd231pd (%rax), %xmm17, %xmm19 {k1} +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd231pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd231pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd231pd %ymm16, %ymm17, %ymm19 +vfmadd231pd (%rax), %ymm17, %ymm19 +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd231pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd231pd (%rax), %ymm17, %ymm19 {k1} +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd231pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd231pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd132ps %xmm16, %xmm17, %xmm19 +vfmadd132ps (%rax), %xmm17, %xmm19 +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd132ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd132ps (%rax), %xmm17, %xmm19 {k1} +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd132ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd132ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd132ps %ymm16, %ymm17, %ymm19 +vfmadd132ps (%rax), %ymm17, %ymm19 +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd132ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd132ps (%rax), %ymm17, %ymm19 {k1} +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd132ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd132ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + +vfmadd213ps %xmm16, %xmm17, %xmm19 +vfmadd213ps (%rax), %xmm17, %xmm19 +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd213ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd213ps (%rax), %xmm17, %xmm19 {k1} +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd213ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd213ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd213ps %ymm16, %ymm17, %ymm19 +vfmadd213ps (%rax), %ymm17, %ymm19 +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd213ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd213ps (%rax), %ymm17, %ymm19 {k1} +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd213ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd213ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + +vfmadd231ps %xmm16, %xmm17, %xmm19 +vfmadd231ps (%rax), %xmm17, %xmm19 +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd231ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd231ps (%rax), %xmm17, %xmm19 {k1} +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd231ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd231ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd231ps %ymm16, %ymm17, %ymm19 +vfmadd231ps (%rax), %ymm17, %ymm19 +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd231ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd231ps (%rax), %ymm17, %ymm19 {k1} +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd231ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd231ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + vgatherdpd (%rax,%xmm1,2), %ymm2 {k1} vgatherdps (%rax,%ymm1,2), %ymm2 {k1} vgatherqpd (%rax,%ymm1,2), %ymm2 {k1} @@ -1961,6 +2081,114 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 3 29 28.00 vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 4 36 28.00 * vdivps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 4 36 28.00 * vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 5 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 5 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 5 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 5 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 5 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 5 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 5 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 5 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 5 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 5 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 5 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 5 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 5 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 5 0.50 * vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} # CHECK-NEXT: 1 5 0.50 * vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} # CHECK-NEXT: 1 5 0.50 * vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} @@ -3000,7 +3228,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1935.00 224.00 525.50 32.00 738.50 450.50 450.50 +# CHECK-NEXT: - 1935.00 278.00 579.50 32.00 738.50 486.50 486.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -3376,6 +3604,114 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - 28.00 2.50 - - 0.50 - - vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 28.00 2.50 - - 0.50 0.50 0.50 vdivps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 28.00 2.50 - - 0.50 0.50 0.50 vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - 0.50 0.50 vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} # CHECK-NEXT: - - - - - - 0.50 0.50 vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} # CHECK-NEXT: - - - - - - 0.50 0.50 vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s index 40ab0656c48889..d99213f0b25a66 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s @@ -298,6 +298,66 @@ vdivps %zmm16, %zmm17, %zmm19 {z}{k1} vdivps (%rax), %zmm17, %zmm19 {z}{k1} vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vfmadd132pd %zmm16, %zmm17, %zmm19 +vfmadd132pd (%rax), %zmm17, %zmm19 +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd132pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd132pd (%rax), %zmm17, %zmm19 {k1} +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd132pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd132pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd213pd %zmm16, %zmm17, %zmm19 +vfmadd213pd (%rax), %zmm17, %zmm19 +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd213pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd213pd (%rax), %zmm17, %zmm19 {k1} +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd213pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd213pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd231pd %zmm16, %zmm17, %zmm19 +vfmadd231pd (%rax), %zmm17, %zmm19 +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd231pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd231pd (%rax), %zmm17, %zmm19 {k1} +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd231pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd231pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd132ps %zmm16, %zmm17, %zmm19 +vfmadd132ps (%rax), %zmm17, %zmm19 +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd132ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd132ps (%rax), %zmm17, %zmm19 {k1} +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd132ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd132ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + +vfmadd213ps %zmm16, %zmm17, %zmm19 +vfmadd213ps (%rax), %zmm17, %zmm19 +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd213ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd213ps (%rax), %zmm17, %zmm19 {k1} +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd213ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd213ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + +vfmadd231ps %zmm16, %zmm17, %zmm19 +vfmadd231ps (%rax), %zmm17, %zmm19 +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd231ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd231ps (%rax), %zmm17, %zmm19 {k1} +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd231ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd231ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + vgatherdpd (%rax,%ymm1,2), %zmm2 {k1} vgatherdps (%rax,%zmm1,2), %zmm2 {k1} vgatherqpd (%rax,%zmm1,2), %zmm2 {k1} @@ -1274,6 +1334,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 3 18 10.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 25 10.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 25 10.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 5 21 4.00 * vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: 5 25 8.00 * vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 5 21 4.00 * vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} @@ -1917,7 +2031,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 612.00 317.17 99.67 309.50 309.50 8.00 612.17 2.00 8.00 8.00 8.00 +# CHECK-NEXT: - 612.00 344.17 99.67 327.50 327.50 8.00 639.17 2.00 8.00 8.00 8.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2180,6 +2294,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - 10.00 2.00 - - - - 1.00 - - - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - - - vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - - - vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 8.00 8.00 - 0.58 0.25 - - - vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - - - vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s index 6d11293789a60d..375087ae0cfe4e 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s @@ -418,6 +418,126 @@ vdivps %ymm16, %ymm17, %ymm19 {z}{k1} vdivps (%rax), %ymm17, %ymm19 {z}{k1} vdivps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vfmadd132pd %xmm16, %xmm17, %xmm19 +vfmadd132pd (%rax), %xmm17, %xmm19 +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd132pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd132pd (%rax), %xmm17, %xmm19 {k1} +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd132pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd132pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd132pd %ymm16, %ymm17, %ymm19 +vfmadd132pd (%rax), %ymm17, %ymm19 +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd132pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd132pd (%rax), %ymm17, %ymm19 {k1} +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd132pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd132pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd213pd %xmm16, %xmm17, %xmm19 +vfmadd213pd (%rax), %xmm17, %xmm19 +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd213pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd213pd (%rax), %xmm17, %xmm19 {k1} +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd213pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd213pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd213pd %ymm16, %ymm17, %ymm19 +vfmadd213pd (%rax), %ymm17, %ymm19 +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd213pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd213pd (%rax), %ymm17, %ymm19 {k1} +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd213pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd213pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd231pd %xmm16, %xmm17, %xmm19 +vfmadd231pd (%rax), %xmm17, %xmm19 +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd231pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd231pd (%rax), %xmm17, %xmm19 {k1} +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd231pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd231pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd231pd %ymm16, %ymm17, %ymm19 +vfmadd231pd (%rax), %ymm17, %ymm19 +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd231pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd231pd (%rax), %ymm17, %ymm19 {k1} +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd231pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd231pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd132ps %xmm16, %xmm17, %xmm19 +vfmadd132ps (%rax), %xmm17, %xmm19 +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd132ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd132ps (%rax), %xmm17, %xmm19 {k1} +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd132ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd132ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd132ps %ymm16, %ymm17, %ymm19 +vfmadd132ps (%rax), %ymm17, %ymm19 +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd132ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd132ps (%rax), %ymm17, %ymm19 {k1} +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd132ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd132ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + +vfmadd213ps %xmm16, %xmm17, %xmm19 +vfmadd213ps (%rax), %xmm17, %xmm19 +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd213ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd213ps (%rax), %xmm17, %xmm19 {k1} +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd213ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd213ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd213ps %ymm16, %ymm17, %ymm19 +vfmadd213ps (%rax), %ymm17, %ymm19 +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd213ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd213ps (%rax), %ymm17, %ymm19 {k1} +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd213ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd213ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + +vfmadd231ps %xmm16, %xmm17, %xmm19 +vfmadd231ps (%rax), %xmm17, %xmm19 +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd231ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd231ps (%rax), %xmm17, %xmm19 {k1} +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd231ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd231ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd231ps %ymm16, %ymm17, %ymm19 +vfmadd231ps (%rax), %ymm17, %ymm19 +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd231ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd231ps (%rax), %ymm17, %ymm19 {k1} +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd231ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd231ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + vgatherdpd (%rax,%xmm1,2), %ymm2 {k1} vgatherdps (%rax,%ymm1,2), %ymm2 {k1} vgatherqpd (%rax,%ymm1,2), %ymm2 {k1} @@ -1961,6 +2081,114 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 11 5.00 vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 18 5.00 * vdivps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 18 5.00 * vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 5 19 2.00 * vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} # CHECK-NEXT: 5 21 4.00 * vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} # CHECK-NEXT: 5 19 2.00 * vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} @@ -3004,7 +3232,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 423.00 384.33 359.33 456.50 456.50 16.00 722.33 4.00 16.00 16.00 16.00 +# CHECK-NEXT: - 423.00 438.33 413.33 492.50 492.50 16.00 722.33 4.00 16.00 16.00 16.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -3380,6 +3608,114 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - 5.00 1.00 - - - - - - - - - vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - - - vdivps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - - - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - - - vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - - - vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - - - vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s index 784c851d707e81..b34ccaacc11a32 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s @@ -298,6 +298,66 @@ vdivps %zmm16, %zmm17, %zmm19 {z}{k1} vdivps (%rax), %zmm17, %zmm19 {z}{k1} vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vfmadd132pd %zmm16, %zmm17, %zmm19 +vfmadd132pd (%rax), %zmm17, %zmm19 +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd132pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd132pd (%rax), %zmm17, %zmm19 {k1} +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd132pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd132pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd213pd %zmm16, %zmm17, %zmm19 +vfmadd213pd (%rax), %zmm17, %zmm19 +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd213pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd213pd (%rax), %zmm17, %zmm19 {k1} +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd213pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd213pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd231pd %zmm16, %zmm17, %zmm19 +vfmadd231pd (%rax), %zmm17, %zmm19 +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd231pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd231pd (%rax), %zmm17, %zmm19 {k1} +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd231pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd231pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd132ps %zmm16, %zmm17, %zmm19 +vfmadd132ps (%rax), %zmm17, %zmm19 +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd132ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd132ps (%rax), %zmm17, %zmm19 {k1} +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd132ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd132ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + +vfmadd213ps %zmm16, %zmm17, %zmm19 +vfmadd213ps (%rax), %zmm17, %zmm19 +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd213ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd213ps (%rax), %zmm17, %zmm19 {k1} +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd213ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd213ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + +vfmadd231ps %zmm16, %zmm17, %zmm19 +vfmadd231ps (%rax), %zmm17, %zmm19 +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd231ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd231ps (%rax), %zmm17, %zmm19 {k1} +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd231ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd231ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + vgatherdpd (%rax,%ymm1,2), %zmm2 {k1} vgatherdps (%rax,%zmm1,2), %zmm2 {k1} vgatherqpd (%rax,%zmm1,2), %zmm2 {k1} @@ -1274,6 +1334,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 3 18 2.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 25 2.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 25 2.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd132pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 12 1.00 * vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 12 1.00 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 12 1.00 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 12 1.00 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 12 1.00 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 12 1.00 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd213pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 12 1.00 * vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 12 1.00 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 12 1.00 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 12 1.00 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 12 1.00 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 12 1.00 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd231pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 12 1.00 * vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 12 1.00 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 12 1.00 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 12 1.00 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 12 1.00 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 12 1.00 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd132ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 12 1.00 * vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 12 1.00 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 12 1.00 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 12 1.00 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 12 1.00 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 12 1.00 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd213ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 12 1.00 * vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 12 1.00 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 12 1.00 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 12 1.00 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 12 1.00 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 12 1.00 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd231ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 12 1.00 * vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 12 1.00 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 12 1.00 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 12 1.00 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 12 1.00 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 12 1.00 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 11 28 2.67 * vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: 19 30 5.33 * vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 11 28 2.67 * vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} @@ -1918,7 +2032,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 436.00 12.00 206.33 206.33 8.00 575.00 - 8.00 8.00 8.00 - 206.33 - +# CHECK-NEXT: 490.00 12.00 218.33 218.33 8.00 575.00 - 8.00 8.00 8.00 - 218.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -2181,6 +2295,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: 1.00 - 5.33 5.33 - 2.00 - - - - - 5.33 - vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s index 2b2d1f3dcc876d..3ad66f1c3d7128 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s @@ -418,6 +418,126 @@ vdivps %ymm16, %ymm17, %ymm19 {z}{k1} vdivps (%rax), %ymm17, %ymm19 {z}{k1} vdivps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vfmadd132pd %xmm16, %xmm17, %xmm19 +vfmadd132pd (%rax), %xmm17, %xmm19 +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd132pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd132pd (%rax), %xmm17, %xmm19 {k1} +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd132pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd132pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd132pd %ymm16, %ymm17, %ymm19 +vfmadd132pd (%rax), %ymm17, %ymm19 +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd132pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd132pd (%rax), %ymm17, %ymm19 {k1} +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd132pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd132pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd213pd %xmm16, %xmm17, %xmm19 +vfmadd213pd (%rax), %xmm17, %xmm19 +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd213pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd213pd (%rax), %xmm17, %xmm19 {k1} +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd213pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd213pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd213pd %ymm16, %ymm17, %ymm19 +vfmadd213pd (%rax), %ymm17, %ymm19 +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd213pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd213pd (%rax), %ymm17, %ymm19 {k1} +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd213pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd213pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd231pd %xmm16, %xmm17, %xmm19 +vfmadd231pd (%rax), %xmm17, %xmm19 +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd231pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd231pd (%rax), %xmm17, %xmm19 {k1} +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd231pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd231pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd231pd %ymm16, %ymm17, %ymm19 +vfmadd231pd (%rax), %ymm17, %ymm19 +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd231pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd231pd (%rax), %ymm17, %ymm19 {k1} +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd231pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd231pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd132ps %xmm16, %xmm17, %xmm19 +vfmadd132ps (%rax), %xmm17, %xmm19 +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd132ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd132ps (%rax), %xmm17, %xmm19 {k1} +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd132ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd132ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd132ps %ymm16, %ymm17, %ymm19 +vfmadd132ps (%rax), %ymm17, %ymm19 +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd132ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd132ps (%rax), %ymm17, %ymm19 {k1} +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd132ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd132ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + +vfmadd213ps %xmm16, %xmm17, %xmm19 +vfmadd213ps (%rax), %xmm17, %xmm19 +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd213ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd213ps (%rax), %xmm17, %xmm19 {k1} +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd213ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd213ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd213ps %ymm16, %ymm17, %ymm19 +vfmadd213ps (%rax), %ymm17, %ymm19 +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd213ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd213ps (%rax), %ymm17, %ymm19 {k1} +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd213ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd213ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + +vfmadd231ps %xmm16, %xmm17, %xmm19 +vfmadd231ps (%rax), %xmm17, %xmm19 +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd231ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd231ps (%rax), %xmm17, %xmm19 {k1} +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd231ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd231ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd231ps %ymm16, %ymm17, %ymm19 +vfmadd231ps (%rax), %ymm17, %ymm19 +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd231ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd231ps (%rax), %ymm17, %ymm19 {k1} +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd231ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd231ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + vgatherdpd (%rax,%xmm1,2), %ymm2 {k1} vgatherdps (%rax,%ymm1,2), %ymm2 {k1} vgatherqpd (%rax,%ymm1,2), %ymm2 {k1} @@ -1961,6 +2081,114 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 11 1.00 vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 19 1.00 * vdivps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 19 1.00 * vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 12 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 12 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 12 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 12 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 12 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 12 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 12 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 12 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 12 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 12 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 12 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 12 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 12 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 12 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 12 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 12 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 12 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 12 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 12 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 12 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 12 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 12 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 12 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 12 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 12 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 12 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 12 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 12 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 12 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 12 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 12 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 12 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 12 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 12 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 12 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 12 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 7 28 1.33 * vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} # CHECK-NEXT: 11 29 2.67 * vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} # CHECK-NEXT: 7 28 1.33 * vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} @@ -3005,7 +3233,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 323.33 347.33 304.33 304.33 16.00 794.33 - 16.00 16.00 16.00 - 304.33 - +# CHECK-NEXT: 377.33 401.33 328.33 328.33 16.00 794.33 - 16.00 16.00 16.00 - 328.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -3381,6 +3609,114 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} # CHECK-NEXT: 1.00 1.00 2.67 2.67 - 1.00 - - - - - 2.67 - vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} # CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s index c17426a6540509..b1bfd7a9ec448a 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s @@ -298,6 +298,66 @@ vdivps %zmm16, %zmm17, %zmm19 {z}{k1} vdivps (%rax), %zmm17, %zmm19 {z}{k1} vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vfmadd132pd %zmm16, %zmm17, %zmm19 +vfmadd132pd (%rax), %zmm17, %zmm19 +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd132pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd132pd (%rax), %zmm17, %zmm19 {k1} +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd132pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd132pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd213pd %zmm16, %zmm17, %zmm19 +vfmadd213pd (%rax), %zmm17, %zmm19 +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd213pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd213pd (%rax), %zmm17, %zmm19 {k1} +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd213pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd213pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd231pd %zmm16, %zmm17, %zmm19 +vfmadd231pd (%rax), %zmm17, %zmm19 +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd231pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd231pd (%rax), %zmm17, %zmm19 {k1} +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd231pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd231pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd132ps %zmm16, %zmm17, %zmm19 +vfmadd132ps (%rax), %zmm17, %zmm19 +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd132ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd132ps (%rax), %zmm17, %zmm19 {k1} +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd132ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd132ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + +vfmadd213ps %zmm16, %zmm17, %zmm19 +vfmadd213ps (%rax), %zmm17, %zmm19 +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd213ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd213ps (%rax), %zmm17, %zmm19 {k1} +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd213ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd213ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + +vfmadd231ps %zmm16, %zmm17, %zmm19 +vfmadd231ps (%rax), %zmm17, %zmm19 +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd231ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd231ps (%rax), %zmm17, %zmm19 {k1} +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd231ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd231ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + vgatherdpd (%rax,%ymm1,2), %zmm2 {k1} vgatherdps (%rax,%zmm1,2), %zmm2 {k1} vgatherqpd (%rax,%zmm1,2), %zmm2 {k1} @@ -1274,6 +1334,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 3 18 10.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 25 10.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 25 10.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 5 21 4.00 * vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: 5 25 8.00 * vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 5 21 4.00 * vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} @@ -1915,7 +2029,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 612.00 312.67 99.67 314.83 314.83 16.00 616.67 2.00 5.33 +# CHECK-NEXT: - 612.00 339.67 99.67 332.83 332.83 16.00 643.67 2.00 5.33 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2178,6 +2292,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - 10.00 2.00 - - - - 1.00 - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd132pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd213pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd231pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd132ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd213ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd231ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 8.00 8.00 - 0.58 0.25 - vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s index d61bc0d9c7e3b3..2ad91ea514aa20 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s @@ -418,6 +418,126 @@ vdivps %ymm16, %ymm17, %ymm19 {z}{k1} vdivps (%rax), %ymm17, %ymm19 {z}{k1} vdivps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vfmadd132pd %xmm16, %xmm17, %xmm19 +vfmadd132pd (%rax), %xmm17, %xmm19 +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd132pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd132pd (%rax), %xmm17, %xmm19 {k1} +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd132pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd132pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd132pd %ymm16, %ymm17, %ymm19 +vfmadd132pd (%rax), %ymm17, %ymm19 +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd132pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd132pd (%rax), %ymm17, %ymm19 {k1} +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd132pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd132pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd213pd %xmm16, %xmm17, %xmm19 +vfmadd213pd (%rax), %xmm17, %xmm19 +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd213pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd213pd (%rax), %xmm17, %xmm19 {k1} +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd213pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd213pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd213pd %ymm16, %ymm17, %ymm19 +vfmadd213pd (%rax), %ymm17, %ymm19 +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd213pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd213pd (%rax), %ymm17, %ymm19 {k1} +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd213pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd213pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd231pd %xmm16, %xmm17, %xmm19 +vfmadd231pd (%rax), %xmm17, %xmm19 +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd231pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd231pd (%rax), %xmm17, %xmm19 {k1} +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd231pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd231pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd231pd %ymm16, %ymm17, %ymm19 +vfmadd231pd (%rax), %ymm17, %ymm19 +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd231pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd231pd (%rax), %ymm17, %ymm19 {k1} +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd231pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd231pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd132ps %xmm16, %xmm17, %xmm19 +vfmadd132ps (%rax), %xmm17, %xmm19 +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd132ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd132ps (%rax), %xmm17, %xmm19 {k1} +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd132ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd132ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd132ps %ymm16, %ymm17, %ymm19 +vfmadd132ps (%rax), %ymm17, %ymm19 +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd132ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd132ps (%rax), %ymm17, %ymm19 {k1} +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd132ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd132ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + +vfmadd213ps %xmm16, %xmm17, %xmm19 +vfmadd213ps (%rax), %xmm17, %xmm19 +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd213ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd213ps (%rax), %xmm17, %xmm19 {k1} +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd213ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd213ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd213ps %ymm16, %ymm17, %ymm19 +vfmadd213ps (%rax), %ymm17, %ymm19 +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd213ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd213ps (%rax), %ymm17, %ymm19 {k1} +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd213ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd213ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + +vfmadd231ps %xmm16, %xmm17, %xmm19 +vfmadd231ps (%rax), %xmm17, %xmm19 +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd231ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd231ps (%rax), %xmm17, %xmm19 {k1} +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd231ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd231ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd231ps %ymm16, %ymm17, %ymm19 +vfmadd231ps (%rax), %ymm17, %ymm19 +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd231ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd231ps (%rax), %ymm17, %ymm19 {k1} +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd231ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd231ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + vgatherdpd (%rax,%xmm1,2), %ymm2 {k1} vgatherdps (%rax,%ymm1,2), %ymm2 {k1} vgatherqpd (%rax,%ymm1,2), %ymm2 {k1} @@ -1961,6 +2081,114 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 11 5.00 vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 18 5.00 * vdivps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 18 5.00 * vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 5 19 2.00 * vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} # CHECK-NEXT: 5 21 4.00 * vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} # CHECK-NEXT: 5 19 2.00 * vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} @@ -3002,7 +3230,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 423.00 384.33 296.33 467.17 467.17 32.00 785.33 4.00 10.67 +# CHECK-NEXT: - 423.00 438.33 350.33 503.17 503.17 32.00 785.33 4.00 10.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -3378,6 +3606,114 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - 5.00 1.00 - - - - - - - vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - vdivps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s index 4fb89645febce6..6742cfccb2d001 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s @@ -298,6 +298,66 @@ vdivps %zmm16, %zmm17, %zmm19 {z}{k1} vdivps (%rax), %zmm17, %zmm19 {z}{k1} vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vfmadd132pd %zmm16, %zmm17, %zmm19 +vfmadd132pd (%rax), %zmm17, %zmm19 +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd132pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd132pd (%rax), %zmm17, %zmm19 {k1} +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd132pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd132pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd213pd %zmm16, %zmm17, %zmm19 +vfmadd213pd (%rax), %zmm17, %zmm19 +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd213pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd213pd (%rax), %zmm17, %zmm19 {k1} +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd213pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd213pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd231pd %zmm16, %zmm17, %zmm19 +vfmadd231pd (%rax), %zmm17, %zmm19 +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +vfmadd231pd %zmm16, %zmm17, %zmm19 {k1} +vfmadd231pd (%rax), %zmm17, %zmm19 {k1} +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {k1} +vfmadd231pd %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd231pd (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {z}{k1} + +vfmadd132ps %zmm16, %zmm17, %zmm19 +vfmadd132ps (%rax), %zmm17, %zmm19 +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd132ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd132ps (%rax), %zmm17, %zmm19 {k1} +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd132ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd132ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + +vfmadd213ps %zmm16, %zmm17, %zmm19 +vfmadd213ps (%rax), %zmm17, %zmm19 +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd213ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd213ps (%rax), %zmm17, %zmm19 {k1} +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd213ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd213ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + +vfmadd231ps %zmm16, %zmm17, %zmm19 +vfmadd231ps (%rax), %zmm17, %zmm19 +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +vfmadd231ps %zmm16, %zmm17, %zmm19 {k1} +vfmadd231ps (%rax), %zmm17, %zmm19 {k1} +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {k1} +vfmadd231ps %zmm16, %zmm17, %zmm19 {z}{k1} +vfmadd231ps (%rax), %zmm17, %zmm19 {z}{k1} +vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} + vgatherdpd (%rax,%ymm1,2), %zmm2 {k1} vgatherdps (%rax,%zmm1,2), %zmm2 {k1} vgatherqpd (%rax,%zmm1,2), %zmm2 {k1} @@ -1274,6 +1334,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 11 6.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1 18 6.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1 18 6.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd132pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 1 11 1.00 * vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1 11 1.00 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 11 1.00 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 11 1.00 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 11 1.00 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 11 1.00 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd213pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 1 11 1.00 * vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1 11 1.00 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 11 1.00 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 11 1.00 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 11 1.00 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 11 1.00 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd231pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 1 11 1.00 * vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1 11 1.00 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 11 1.00 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 11 1.00 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 11 1.00 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 11 1.00 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd132ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 1 11 1.00 * vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1 11 1.00 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 11 1.00 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 11 1.00 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 11 1.00 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 11 1.00 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd213ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 1 11 1.00 * vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1 11 1.00 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 11 1.00 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 11 1.00 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 11 1.00 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 11 1.00 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd231ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 1 11 1.00 * vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1 11 1.00 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 11 1.00 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 11 1.00 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 11 1.00 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 11 1.00 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1 5 0.33 * vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: 1 5 0.33 * vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 1 5 0.33 * vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} @@ -1928,7 +2042,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 2.67 2.67 2.67 - - - - - 167.00 1006.50 618.00 352.50 277.50 277.50 16.00 187.67 187.67 187.67 182.33 182.33 182.33 8.00 8.00 +# CHECK-NEXT: 2.67 2.67 2.67 - - - - - 221.00 1060.50 618.00 352.50 295.50 295.50 16.00 199.67 199.67 199.67 194.33 194.33 194.33 8.00 8.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -2191,6 +2305,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - - - - - - 6.00 - - - - - - - - - - - - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 6.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 6.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s index a1fd2056d85426..e2813a564384cd 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s @@ -418,6 +418,126 @@ vdivps %ymm16, %ymm17, %ymm19 {z}{k1} vdivps (%rax), %ymm17, %ymm19 {z}{k1} vdivps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vfmadd132pd %xmm16, %xmm17, %xmm19 +vfmadd132pd (%rax), %xmm17, %xmm19 +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd132pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd132pd (%rax), %xmm17, %xmm19 {k1} +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd132pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd132pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd132pd %ymm16, %ymm17, %ymm19 +vfmadd132pd (%rax), %ymm17, %ymm19 +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd132pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd132pd (%rax), %ymm17, %ymm19 {k1} +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd132pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd132pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd213pd %xmm16, %xmm17, %xmm19 +vfmadd213pd (%rax), %xmm17, %xmm19 +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd213pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd213pd (%rax), %xmm17, %xmm19 {k1} +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd213pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd213pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd213pd %ymm16, %ymm17, %ymm19 +vfmadd213pd (%rax), %ymm17, %ymm19 +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd213pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd213pd (%rax), %ymm17, %ymm19 {k1} +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd213pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd213pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd231pd %xmm16, %xmm17, %xmm19 +vfmadd231pd (%rax), %xmm17, %xmm19 +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +vfmadd231pd %xmm16, %xmm17, %xmm19 {k1} +vfmadd231pd (%rax), %xmm17, %xmm19 {k1} +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {k1} +vfmadd231pd %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd231pd (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {z}{k1} + +vfmadd231pd %ymm16, %ymm17, %ymm19 +vfmadd231pd (%rax), %ymm17, %ymm19 +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +vfmadd231pd %ymm16, %ymm17, %ymm19 {k1} +vfmadd231pd (%rax), %ymm17, %ymm19 {k1} +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {k1} +vfmadd231pd %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd231pd (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {z}{k1} + +vfmadd132ps %xmm16, %xmm17, %xmm19 +vfmadd132ps (%rax), %xmm17, %xmm19 +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd132ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd132ps (%rax), %xmm17, %xmm19 {k1} +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd132ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd132ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd132ps %ymm16, %ymm17, %ymm19 +vfmadd132ps (%rax), %ymm17, %ymm19 +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd132ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd132ps (%rax), %ymm17, %ymm19 {k1} +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd132ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd132ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + +vfmadd213ps %xmm16, %xmm17, %xmm19 +vfmadd213ps (%rax), %xmm17, %xmm19 +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd213ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd213ps (%rax), %xmm17, %xmm19 {k1} +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd213ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd213ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd213ps %ymm16, %ymm17, %ymm19 +vfmadd213ps (%rax), %ymm17, %ymm19 +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd213ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd213ps (%rax), %ymm17, %ymm19 {k1} +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd213ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd213ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + +vfmadd231ps %xmm16, %xmm17, %xmm19 +vfmadd231ps (%rax), %xmm17, %xmm19 +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +vfmadd231ps %xmm16, %xmm17, %xmm19 {k1} +vfmadd231ps (%rax), %xmm17, %xmm19 {k1} +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {k1} +vfmadd231ps %xmm16, %xmm17, %xmm19 {z}{k1} +vfmadd231ps (%rax), %xmm17, %xmm19 {z}{k1} +vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {z}{k1} + +vfmadd231ps %ymm16, %ymm17, %ymm19 +vfmadd231ps (%rax), %ymm17, %ymm19 +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +vfmadd231ps %ymm16, %ymm17, %ymm19 {k1} +vfmadd231ps (%rax), %ymm17, %ymm19 {k1} +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {k1} +vfmadd231ps %ymm16, %ymm17, %ymm19 {z}{k1} +vfmadd231ps (%rax), %ymm17, %ymm19 {z}{k1} +vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} + vgatherdpd (%rax,%xmm1,2), %ymm2 {k1} vgatherdps (%rax,%ymm1,2), %ymm2 {k1} vgatherqpd (%rax,%ymm1,2), %ymm2 {k1} @@ -1961,6 +2081,114 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 11 3.00 vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 18 3.00 * vdivps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 18 3.00 * vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1 11 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 11 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 11 0.50 * vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1 5 0.33 * vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} # CHECK-NEXT: 1 5 0.33 * vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} # CHECK-NEXT: 1 5 0.33 * vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} @@ -3015,7 +3243,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 5.33 5.33 5.33 - - - - - 154.00 924.00 531.50 261.50 442.50 442.50 32.00 300.33 300.33 300.33 289.67 289.67 289.67 16.00 16.00 +# CHECK-NEXT: 5.33 5.33 5.33 - - - - - 208.00 978.00 531.50 261.50 478.50 478.50 32.00 324.33 324.33 324.33 313.67 313.67 313.67 16.00 16.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -3391,6 +3619,114 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdivps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} From 80941193765b96cf5d6d95313665cc0a720e1d28 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 30 Oct 2023 18:06:07 +0000 Subject: [PATCH 057/144] [X86] IceLakeServer - ZMM FMA can only use Port0 Fix discrepancy from when this was forked from the SkylakeServer model Confirmed with Agner + uops.info --- llvm/lib/Target/X86/X86SchedIceLake.td | 2 +- .../X86/IceLakeServer/resources-avx512.s | 218 +++++++++--------- 2 files changed, 110 insertions(+), 110 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index e5be73d22c40b6..a2aa2655bca285 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -316,7 +316,7 @@ defm : ICXWriteResPair; defm : ICXWriteResPair; // Fused Multiply Add. defm : ICXWriteResPair; defm : ICXWriteResPair; -defm : ICXWriteResPair; +defm : ICXWriteResPair; defm : ICXWriteResPair; // Floating point double dot product. defm : ICXWriteResPair; defm : ICXWriteResPair; diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s index d99213f0b25a66..6d33fdb3359b4f 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s @@ -1334,60 +1334,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 3 18 10.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 25 10.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 25 10.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 -# CHECK-NEXT: 1 4 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1 4 0.50 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 0.50 vfmadd213pd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 -# CHECK-NEXT: 1 4 0.50 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1 4 0.50 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 0.50 vfmadd231pd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 -# CHECK-NEXT: 1 4 0.50 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1 4 0.50 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 0.50 vfmadd132ps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 -# CHECK-NEXT: 1 4 0.50 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1 4 0.50 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 0.50 vfmadd213ps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 -# CHECK-NEXT: 1 4 0.50 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1 4 0.50 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 4 0.50 vfmadd231ps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 -# CHECK-NEXT: 1 4 0.50 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1 4 0.50 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd132pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 1.00 * vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 1.00 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 1.00 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 1.00 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 1.00 * vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 1.00 * vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd213pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 1.00 * vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 1.00 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 1.00 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 1.00 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 1.00 * vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 1.00 * vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd231pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 1.00 * vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 1.00 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 1.00 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 1.00 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 1.00 * vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 1.00 * vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd132ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 1.00 * vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 1.00 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 1.00 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 1.00 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 1.00 * vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 1.00 * vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd213ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 1.00 * vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 1.00 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 1.00 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 1.00 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 1.00 * vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 1.00 * vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 1.00 vfmadd231ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: 2 11 1.00 * vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2 11 1.00 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1 4 1.00 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 1.00 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2 11 1.00 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1 4 1.00 vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 1.00 * vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 1.00 * vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 5 21 4.00 * vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: 5 25 8.00 * vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 5 21 4.00 * vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} @@ -2031,7 +2031,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 612.00 344.17 99.67 327.50 327.50 8.00 639.17 2.00 8.00 8.00 8.00 +# CHECK-NEXT: - 612.00 371.17 99.67 327.50 327.50 8.00 612.17 2.00 8.00 8.00 8.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2294,60 +2294,60 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - 10.00 2.00 - - - - 1.00 - - - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - - - vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132pd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213pd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231pd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132ps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213ps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231ps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - - - vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 8.00 8.00 - 0.58 0.25 - - - vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - - - vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} From a8799719f704fc80640c59b26f6c9ad2631ee103 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Mon, 30 Oct 2023 15:31:52 +0100 Subject: [PATCH 058/144] [AArch64] Introduce tests for PR67879 (NFC) --- .../Atomics/aarch64-atomic-load-rcpc_immo.ll | 827 ++++++++++++++++++ .../Atomics/aarch64-atomic-store-rcpc_immo.ll | 368 ++++++++ 2 files changed, 1195 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll new file mode 100644 index 00000000000000..05f37a4e440eb0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll @@ -0,0 +1,827 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)" +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=true -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG + +define i8 @load_atomic_i8_aligned_unordered(ptr %ptr) { +; CHECK-LABEL: load_atomic_i8_aligned_unordered: +; CHECK: ldrb w0, [x0, #4] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep unordered, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_aligned_unordered_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i8_aligned_unordered_const: +; CHECK: ldrb w0, [x0, #4] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep unordered, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_aligned_monotonic(ptr %ptr) { +; CHECK-LABEL: load_atomic_i8_aligned_monotonic: +; CHECK: ldrb w0, [x0, #4] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep monotonic, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_aligned_monotonic_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i8_aligned_monotonic_const: +; CHECK: ldrb w0, [x0, #4] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep monotonic, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_aligned_acquire(ptr %ptr) { +; CHECK-LABEL: load_atomic_i8_aligned_acquire: +; CHECK: add x8, x0, #4 +; CHECK: ldaprb w0, [x8] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep acquire, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_aligned_acquire_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i8_aligned_acquire_const: +; CHECK: add x8, x0, #4 +; CHECK: ldaprb w0, [x8] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep acquire, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_aligned_seq_cst(ptr %ptr) { +; CHECK-LABEL: load_atomic_i8_aligned_seq_cst: +; CHECK: add x8, x0, #4 +; CHECK: ldarb w0, [x8] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep seq_cst, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_aligned_seq_cst_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i8_aligned_seq_cst_const: +; CHECK: add x8, x0, #4 +; CHECK: ldarb w0, [x8] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep seq_cst, align 1 + ret i8 %r +} + +define i16 @load_atomic_i16_aligned_unordered(ptr %ptr) { +; CHECK-LABEL: load_atomic_i16_aligned_unordered: +; CHECK: ldrh w0, [x0, #8] + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep unordered, align 2 + ret i16 %r +} + +define i16 @load_atomic_i16_aligned_unordered_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i16_aligned_unordered_const: +; CHECK: ldrh w0, [x0, #8] + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep unordered, align 2 + ret i16 %r +} + +define i16 @load_atomic_i16_aligned_monotonic(ptr %ptr) { +; CHECK-LABEL: load_atomic_i16_aligned_monotonic: +; CHECK: ldrh w0, [x0, #8] + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep monotonic, align 2 + ret i16 %r +} + +define i16 @load_atomic_i16_aligned_monotonic_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i16_aligned_monotonic_const: +; CHECK: ldrh w0, [x0, #8] + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep monotonic, align 2 + ret i16 %r +} + +define i16 @load_atomic_i16_aligned_acquire(ptr %ptr) { +; CHECK-LABEL: load_atomic_i16_aligned_acquire: +; CHECK: add x8, x0, #8 +; CHECK: ldaprh w0, [x8] + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep acquire, align 2 + ret i16 %r +} + +define i16 @load_atomic_i16_aligned_acquire_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i16_aligned_acquire_const: +; CHECK: add x8, x0, #8 +; CHECK: ldaprh w0, [x8] + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep acquire, align 2 + ret i16 %r +} + +define i16 @load_atomic_i16_aligned_seq_cst(ptr %ptr) { +; CHECK-LABEL: load_atomic_i16_aligned_seq_cst: +; CHECK: add x8, x0, #8 +; CHECK: ldarh w0, [x8] + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep seq_cst, align 2 + ret i16 %r +} + +define i16 @load_atomic_i16_aligned_seq_cst_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i16_aligned_seq_cst_const: +; CHECK: add x8, x0, #8 +; CHECK: ldarh w0, [x8] + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep seq_cst, align 2 + ret i16 %r +} + +define i32 @load_atomic_i32_aligned_unordered(ptr %ptr) { +; CHECK-LABEL: load_atomic_i32_aligned_unordered: +; CHECK: ldr w0, [x0, #16] + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep unordered, align 4 + ret i32 %r +} + +define i32 @load_atomic_i32_aligned_unordered_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i32_aligned_unordered_const: +; CHECK: ldr w0, [x0, #16] + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep unordered, align 4 + ret i32 %r +} + +define i32 @load_atomic_i32_aligned_monotonic(ptr %ptr) { +; CHECK-LABEL: load_atomic_i32_aligned_monotonic: +; CHECK: ldr w0, [x0, #16] + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep monotonic, align 4 + ret i32 %r +} + +define i32 @load_atomic_i32_aligned_monotonic_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i32_aligned_monotonic_const: +; CHECK: ldr w0, [x0, #16] + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep monotonic, align 4 + ret i32 %r +} + +define i32 @load_atomic_i32_aligned_acquire(ptr %ptr) { +; CHECK-LABEL: load_atomic_i32_aligned_acquire: +; CHECK: add x8, x0, #16 +; CHECK: ldapr w0, [x8] + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep acquire, align 4 + ret i32 %r +} + +define i32 @load_atomic_i32_aligned_acquire_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i32_aligned_acquire_const: +; CHECK: add x8, x0, #16 +; CHECK: ldapr w0, [x8] + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep acquire, align 4 + ret i32 %r +} + +define i32 @load_atomic_i32_aligned_seq_cst(ptr %ptr) { +; CHECK-LABEL: load_atomic_i32_aligned_seq_cst: +; CHECK: add x8, x0, #16 +; CHECK: ldar w0, [x8] + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep seq_cst, align 4 + ret i32 %r +} + +define i32 @load_atomic_i32_aligned_seq_cst_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i32_aligned_seq_cst_const: +; CHECK: add x8, x0, #16 +; CHECK: ldar w0, [x8] + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep seq_cst, align 4 + ret i32 %r +} + +define i64 @load_atomic_i64_aligned_unordered(ptr %ptr) { +; CHECK-LABEL: load_atomic_i64_aligned_unordered: +; CHECK: ldr x0, [x0, #32] + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep unordered, align 8 + ret i64 %r +} + +define i64 @load_atomic_i64_aligned_unordered_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i64_aligned_unordered_const: +; CHECK: ldr x0, [x0, #32] + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep unordered, align 8 + ret i64 %r +} + +define i64 @load_atomic_i64_aligned_monotonic(ptr %ptr) { +; CHECK-LABEL: load_atomic_i64_aligned_monotonic: +; CHECK: ldr x0, [x0, #32] + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep monotonic, align 8 + ret i64 %r +} + +define i64 @load_atomic_i64_aligned_monotonic_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i64_aligned_monotonic_const: +; CHECK: ldr x0, [x0, #32] + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep monotonic, align 8 + ret i64 %r +} + +define i64 @load_atomic_i64_aligned_acquire(ptr %ptr) { +; CHECK-LABEL: load_atomic_i64_aligned_acquire: +; CHECK: add x8, x0, #32 +; CHECK: ldapr x0, [x8] + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep acquire, align 8 + ret i64 %r +} + +define i64 @load_atomic_i64_aligned_acquire_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i64_aligned_acquire_const: +; CHECK: add x8, x0, #32 +; CHECK: ldapr x0, [x8] + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep acquire, align 8 + ret i64 %r +} + +define i64 @load_atomic_i64_aligned_seq_cst(ptr %ptr) { +; CHECK-LABEL: load_atomic_i64_aligned_seq_cst: +; CHECK: add x8, x0, #32 +; CHECK: ldar x0, [x8] + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep seq_cst, align 8 + ret i64 %r +} + +define i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i64_aligned_seq_cst_const: +; CHECK: add x8, x0, #32 +; CHECK: ldar x0, [x8] + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep seq_cst, align 8 + ret i64 %r +} + +define i128 @load_atomic_i128_aligned_unordered(ptr %ptr) { +; CHECK-LABEL: load_atomic_i128_aligned_unordered: +; CHECK: ldp x0, x1, [x0, #64] + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep unordered, align 16 + ret i128 %r +} + +define i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i128_aligned_unordered_const: +; CHECK: ldp x0, x1, [x0, #64] + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep unordered, align 16 + ret i128 %r +} + +define i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) { +; CHECK-LABEL: load_atomic_i128_aligned_monotonic: +; CHECK: ldp x0, x1, [x0, #64] + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep monotonic, align 16 + ret i128 %r +} + +define i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const: +; CHECK: ldp x0, x1, [x0, #64] + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep monotonic, align 16 + ret i128 %r +} + +define i128 @load_atomic_i128_aligned_acquire(ptr %ptr) { +; CHECK-LABEL: load_atomic_i128_aligned_acquire: +; CHECK: ldp x0, x1, [x0, #64] +; CHECK: dmb ishld + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep acquire, align 16 + ret i128 %r +} + +define i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i128_aligned_acquire_const: +; CHECK: ldp x0, x1, [x0, #64] +; CHECK: dmb ishld + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep acquire, align 16 + ret i128 %r +} + +define i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) { +; CHECK-LABEL: load_atomic_i128_aligned_seq_cst: +; CHECK: ldp x0, x1, [x0, #64] +; CHECK: dmb ish + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep seq_cst, align 16 + ret i128 %r +} + +define i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const: +; CHECK: ldp x0, x1, [x0, #64] +; CHECK: dmb ish + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep seq_cst, align 16 + ret i128 %r +} + +define i8 @load_atomic_i8_unaligned_unordered(ptr %ptr) { +; CHECK-LABEL: load_atomic_i8_unaligned_unordered: +; CHECK: ldrb w0, [x0, #4] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep unordered, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_unaligned_unordered_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i8_unaligned_unordered_const: +; CHECK: ldrb w0, [x0, #4] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep unordered, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_unaligned_monotonic(ptr %ptr) { +; CHECK-LABEL: load_atomic_i8_unaligned_monotonic: +; CHECK: ldrb w0, [x0, #4] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep monotonic, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_unaligned_monotonic_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i8_unaligned_monotonic_const: +; CHECK: ldrb w0, [x0, #4] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep monotonic, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_unaligned_acquire(ptr %ptr) { +; CHECK-LABEL: load_atomic_i8_unaligned_acquire: +; CHECK: add x8, x0, #4 +; CHECK: ldaprb w0, [x8] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep acquire, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_unaligned_acquire_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i8_unaligned_acquire_const: +; CHECK: add x8, x0, #4 +; CHECK: ldaprb w0, [x8] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep acquire, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_unaligned_seq_cst(ptr %ptr) { +; CHECK-LABEL: load_atomic_i8_unaligned_seq_cst: +; CHECK: add x8, x0, #4 +; CHECK: ldarb w0, [x8] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep seq_cst, align 1 + ret i8 %r +} + +define i8 @load_atomic_i8_unaligned_seq_cst_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_i8_unaligned_seq_cst_const: +; CHECK: add x8, x0, #4 +; CHECK: ldarb w0, [x8] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i8, ptr %gep seq_cst, align 1 + ret i8 %r +} + +define i16 @load_atomic_i16_unaligned_unordered(ptr %ptr) { +; GISEL-LABEL: load_atomic_i16_unaligned_unordered: +; GISEL: add x1, x8, #4 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i16_unaligned_unordered: +; SDAG: add x1, x0, #4 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep unordered, align 1 + ret i16 %r +} + +define i16 @load_atomic_i16_unaligned_unordered_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i16_unaligned_unordered_const: +; GISEL: add x1, x8, #4 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i16_unaligned_unordered_const: +; SDAG: add x1, x0, #4 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep unordered, align 1 + ret i16 %r +} + +define i16 @load_atomic_i16_unaligned_monotonic(ptr %ptr) { +; GISEL-LABEL: load_atomic_i16_unaligned_monotonic: +; GISEL: add x1, x8, #8 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i16_unaligned_monotonic: +; SDAG: add x1, x0, #8 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep monotonic, align 1 + ret i16 %r +} + +define i16 @load_atomic_i16_unaligned_monotonic_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i16_unaligned_monotonic_const: +; GISEL: add x1, x8, #8 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i16_unaligned_monotonic_const: +; SDAG: add x1, x0, #8 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep monotonic, align 1 + ret i16 %r +} + +define i16 @load_atomic_i16_unaligned_acquire(ptr %ptr) { +; GISEL-LABEL: load_atomic_i16_unaligned_acquire: +; GISEL: add x1, x8, #8 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i16_unaligned_acquire: +; SDAG: add x1, x0, #8 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep acquire, align 1 + ret i16 %r +} + +define i16 @load_atomic_i16_unaligned_acquire_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i16_unaligned_acquire_const: +; GISEL: add x1, x8, #8 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i16_unaligned_acquire_const: +; SDAG: add x1, x0, #8 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep acquire, align 1 + ret i16 %r +} + +define i16 @load_atomic_i16_unaligned_seq_cst(ptr %ptr) { +; GISEL-LABEL: load_atomic_i16_unaligned_seq_cst: +; GISEL: add x1, x8, #8 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i16_unaligned_seq_cst: +; SDAG: add x1, x0, #8 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep seq_cst, align 1 + ret i16 %r +} + +define i16 @load_atomic_i16_unaligned_seq_cst_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i16_unaligned_seq_cst_const: +; GISEL: add x1, x8, #8 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i16_unaligned_seq_cst_const: +; SDAG: add x1, x0, #8 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + %r = load atomic i16, ptr %gep seq_cst, align 1 + ret i16 %r +} + +define i32 @load_atomic_i32_unaligned_unordered(ptr %ptr) { +; GISEL-LABEL: load_atomic_i32_unaligned_unordered: +; GISEL: add x1, x8, #16 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i32_unaligned_unordered: +; SDAG: add x1, x0, #16 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep unordered, align 1 + ret i32 %r +} + +define i32 @load_atomic_i32_unaligned_unordered_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i32_unaligned_unordered_const: +; GISEL: add x1, x8, #16 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i32_unaligned_unordered_const: +; SDAG: add x1, x0, #16 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep unordered, align 1 + ret i32 %r +} + +define i32 @load_atomic_i32_unaligned_monotonic(ptr %ptr) { +; GISEL-LABEL: load_atomic_i32_unaligned_monotonic: +; GISEL: add x1, x8, #16 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i32_unaligned_monotonic: +; SDAG: add x1, x0, #16 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep monotonic, align 1 + ret i32 %r +} + +define i32 @load_atomic_i32_unaligned_monotonic_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i32_unaligned_monotonic_const: +; GISEL: add x1, x8, #16 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i32_unaligned_monotonic_const: +; SDAG: add x1, x0, #16 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep monotonic, align 1 + ret i32 %r +} + +define i32 @load_atomic_i32_unaligned_acquire(ptr %ptr) { +; GISEL-LABEL: load_atomic_i32_unaligned_acquire: +; GISEL: add x1, x8, #16 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i32_unaligned_acquire: +; SDAG: add x1, x0, #16 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep acquire, align 1 + ret i32 %r +} + +define i32 @load_atomic_i32_unaligned_acquire_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i32_unaligned_acquire_const: +; GISEL: add x1, x8, #16 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i32_unaligned_acquire_const: +; SDAG: add x1, x0, #16 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep acquire, align 1 + ret i32 %r +} + +define i32 @load_atomic_i32_unaligned_seq_cst(ptr %ptr) { +; GISEL-LABEL: load_atomic_i32_unaligned_seq_cst: +; GISEL: add x1, x8, #16 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i32_unaligned_seq_cst: +; SDAG: add x1, x0, #16 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep seq_cst, align 1 + ret i32 %r +} + +define i32 @load_atomic_i32_unaligned_seq_cst_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i32_unaligned_seq_cst_const: +; GISEL: add x1, x8, #16 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i32_unaligned_seq_cst_const: +; SDAG: add x1, x0, #16 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + %r = load atomic i32, ptr %gep seq_cst, align 1 + ret i32 %r +} + +define i64 @load_atomic_i64_unaligned_unordered(ptr %ptr) { +; GISEL-LABEL: load_atomic_i64_unaligned_unordered: +; GISEL: add x1, x8, #32 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i64_unaligned_unordered: +; SDAG: add x1, x0, #32 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep unordered, align 1 + ret i64 %r +} + +define i64 @load_atomic_i64_unaligned_unordered_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i64_unaligned_unordered_const: +; GISEL: add x1, x8, #32 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i64_unaligned_unordered_const: +; SDAG: add x1, x0, #32 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep unordered, align 1 + ret i64 %r +} + +define i64 @load_atomic_i64_unaligned_monotonic(ptr %ptr) { +; GISEL-LABEL: load_atomic_i64_unaligned_monotonic: +; GISEL: add x1, x8, #32 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i64_unaligned_monotonic: +; SDAG: add x1, x0, #32 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep monotonic, align 1 + ret i64 %r +} + +define i64 @load_atomic_i64_unaligned_monotonic_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i64_unaligned_monotonic_const: +; GISEL: add x1, x8, #32 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i64_unaligned_monotonic_const: +; SDAG: add x1, x0, #32 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep monotonic, align 1 + ret i64 %r +} + +define i64 @load_atomic_i64_unaligned_acquire(ptr %ptr) { +; GISEL-LABEL: load_atomic_i64_unaligned_acquire: +; GISEL: add x1, x8, #32 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i64_unaligned_acquire: +; SDAG: add x1, x0, #32 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep acquire, align 1 + ret i64 %r +} + +define i64 @load_atomic_i64_unaligned_acquire_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i64_unaligned_acquire_const: +; GISEL: add x1, x8, #32 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i64_unaligned_acquire_const: +; SDAG: add x1, x0, #32 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep acquire, align 1 + ret i64 %r +} + +define i64 @load_atomic_i64_unaligned_seq_cst(ptr %ptr) { +; GISEL-LABEL: load_atomic_i64_unaligned_seq_cst: +; GISEL: add x1, x8, #32 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i64_unaligned_seq_cst: +; SDAG: add x1, x0, #32 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep seq_cst, align 1 + ret i64 %r +} + +define i64 @load_atomic_i64_unaligned_seq_cst_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i64_unaligned_seq_cst_const: +; GISEL: add x1, x8, #32 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i64_unaligned_seq_cst_const: +; SDAG: add x1, x0, #32 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + %r = load atomic i64, ptr %gep seq_cst, align 1 + ret i64 %r +} + +define i128 @load_atomic_i128_unaligned_unordered(ptr %ptr) { +; GISEL-LABEL: load_atomic_i128_unaligned_unordered: +; GISEL: add x1, x8, #64 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i128_unaligned_unordered: +; SDAG: add x1, x0, #64 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep unordered, align 1 + ret i128 %r +} + +define i128 @load_atomic_i128_unaligned_unordered_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i128_unaligned_unordered_const: +; GISEL: add x1, x8, #64 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i128_unaligned_unordered_const: +; SDAG: add x1, x0, #64 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep unordered, align 1 + ret i128 %r +} + +define i128 @load_atomic_i128_unaligned_monotonic(ptr %ptr) { +; GISEL-LABEL: load_atomic_i128_unaligned_monotonic: +; GISEL: add x1, x8, #64 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i128_unaligned_monotonic: +; SDAG: add x1, x0, #64 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep monotonic, align 1 + ret i128 %r +} + +define i128 @load_atomic_i128_unaligned_monotonic_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i128_unaligned_monotonic_const: +; GISEL: add x1, x8, #64 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i128_unaligned_monotonic_const: +; SDAG: add x1, x0, #64 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep monotonic, align 1 + ret i128 %r +} + +define i128 @load_atomic_i128_unaligned_acquire(ptr %ptr) { +; GISEL-LABEL: load_atomic_i128_unaligned_acquire: +; GISEL: add x1, x8, #64 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i128_unaligned_acquire: +; SDAG: add x1, x0, #64 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep acquire, align 1 + ret i128 %r +} + +define i128 @load_atomic_i128_unaligned_acquire_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i128_unaligned_acquire_const: +; GISEL: add x1, x8, #64 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i128_unaligned_acquire_const: +; SDAG: add x1, x0, #64 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep acquire, align 1 + ret i128 %r +} + +define i128 @load_atomic_i128_unaligned_seq_cst(ptr %ptr) { +; GISEL-LABEL: load_atomic_i128_unaligned_seq_cst: +; GISEL: add x1, x8, #64 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i128_unaligned_seq_cst: +; SDAG: add x1, x0, #64 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep seq_cst, align 1 + ret i128 %r +} + +define i128 @load_atomic_i128_unaligned_seq_cst_const(ptr readonly %ptr) { +; GISEL-LABEL: load_atomic_i128_unaligned_seq_cst_const: +; GISEL: add x1, x8, #64 +; GISEL: bl __atomic_load +; +; SDAG-LABEL: load_atomic_i128_unaligned_seq_cst_const: +; SDAG: add x1, x0, #64 +; SDAG: bl __atomic_load + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + %r = load atomic i128, ptr %gep seq_cst, align 1 + ret i128 %r +} diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll new file mode 100644 index 00000000000000..86cb738c5799dd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll @@ -0,0 +1,368 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)" +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=true -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG + +define void @store_atomic_i8_aligned_unordered(i8 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i8_aligned_unordered: +; CHECK: strb w0, [x1, #4] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + store atomic i8 %value, ptr %gep unordered, align 1 + ret void +} + +define void @store_atomic_i8_aligned_monotonic(i8 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i8_aligned_monotonic: +; CHECK: strb w0, [x1, #4] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + store atomic i8 %value, ptr %gep monotonic, align 1 + ret void +} + +define void @store_atomic_i8_aligned_release(i8 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i8_aligned_release: +; CHECK: add x8, x1, #4 +; CHECK: stlrb w0, [x8] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + store atomic i8 %value, ptr %gep release, align 1 + ret void +} + +define void @store_atomic_i8_aligned_seq_cst(i8 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i8_aligned_seq_cst: +; CHECK: add x8, x1, #4 +; CHECK: stlrb w0, [x8] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + store atomic i8 %value, ptr %gep seq_cst, align 1 + ret void +} + +define void @store_atomic_i16_aligned_unordered(i16 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i16_aligned_unordered: +; CHECK: strh w0, [x1, #8] + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + store atomic i16 %value, ptr %gep unordered, align 2 + ret void +} + +define void @store_atomic_i16_aligned_monotonic(i16 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i16_aligned_monotonic: +; CHECK: strh w0, [x1, #8] + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + store atomic i16 %value, ptr %gep monotonic, align 2 + ret void +} + +define void @store_atomic_i16_aligned_release(i16 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i16_aligned_release: +; CHECK: add x8, x1, #8 +; CHECK: stlrh w0, [x8] + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + store atomic i16 %value, ptr %gep release, align 2 + ret void +} + +define void @store_atomic_i16_aligned_seq_cst(i16 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i16_aligned_seq_cst: +; CHECK: add x8, x1, #8 +; CHECK: stlrh w0, [x8] + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + store atomic i16 %value, ptr %gep seq_cst, align 2 + ret void +} + +define void @store_atomic_i32_aligned_unordered(i32 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i32_aligned_unordered: +; CHECK: str w0, [x1, #16] + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + store atomic i32 %value, ptr %gep unordered, align 4 + ret void +} + +define void @store_atomic_i32_aligned_monotonic(i32 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i32_aligned_monotonic: +; CHECK: str w0, [x1, #16] + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + store atomic i32 %value, ptr %gep monotonic, align 4 + ret void +} + +define void @store_atomic_i32_aligned_release(i32 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i32_aligned_release: +; CHECK: add x8, x1, #16 +; CHECK: stlr w0, [x8] + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + store atomic i32 %value, ptr %gep release, align 4 + ret void +} + +define void @store_atomic_i32_aligned_seq_cst(i32 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i32_aligned_seq_cst: +; CHECK: add x8, x1, #16 +; CHECK: stlr w0, [x8] + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + store atomic i32 %value, ptr %gep seq_cst, align 4 + ret void +} + +define void @store_atomic_i64_aligned_unordered(i64 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i64_aligned_unordered: +; CHECK: str x0, [x1, #32] + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + store atomic i64 %value, ptr %gep unordered, align 8 + ret void +} + +define void @store_atomic_i64_aligned_monotonic(i64 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i64_aligned_monotonic: +; CHECK: str x0, [x1, #32] + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + store atomic i64 %value, ptr %gep monotonic, align 8 + ret void +} + +define void @store_atomic_i64_aligned_release(i64 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i64_aligned_release: +; CHECK: add x8, x1, #32 +; CHECK: stlr x0, [x8] + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + store atomic i64 %value, ptr %gep release, align 8 + ret void +} + +define void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i64_aligned_seq_cst: +; CHECK: add x8, x1, #32 +; CHECK: stlr x0, [x8] + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + store atomic i64 %value, ptr %gep seq_cst, align 8 + ret void +} + +define void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i128_aligned_unordered: +; CHECK: stp x0, x1, [x2, #64] + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + store atomic i128 %value, ptr %gep unordered, align 16 + ret void +} + +define void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i128_aligned_monotonic: +; CHECK: stp x0, x1, [x2, #64] + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + store atomic i128 %value, ptr %gep monotonic, align 16 + ret void +} + +define void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i128_aligned_release: +; CHECK: dmb ish +; CHECK: stp x0, x1, [x2, #64] + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + store atomic i128 %value, ptr %gep release, align 16 + ret void +} + +define void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i128_aligned_seq_cst: +; CHECK: dmb ish +; CHECK: stp x0, x1, [x2, #64] +; CHECK: dmb ish + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + store atomic i128 %value, ptr %gep seq_cst, align 16 + ret void +} + +define void @store_atomic_i8_unaligned_unordered(i8 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i8_unaligned_unordered: +; CHECK: strb w0, [x1, #4] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + store atomic i8 %value, ptr %gep unordered, align 1 + ret void +} + +define void @store_atomic_i8_unaligned_monotonic(i8 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i8_unaligned_monotonic: +; CHECK: strb w0, [x1, #4] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + store atomic i8 %value, ptr %gep monotonic, align 1 + ret void +} + +define void @store_atomic_i8_unaligned_release(i8 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i8_unaligned_release: +; CHECK: add x8, x1, #4 +; CHECK: stlrb w0, [x8] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + store atomic i8 %value, ptr %gep release, align 1 + ret void +} + +define void @store_atomic_i8_unaligned_seq_cst(i8 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i8_unaligned_seq_cst: +; CHECK: add x8, x1, #4 +; CHECK: stlrb w0, [x8] + %gep = getelementptr inbounds i8, ptr %ptr, i32 4 + store atomic i8 %value, ptr %gep seq_cst, align 1 + ret void +} + +define void @store_atomic_i16_unaligned_unordered(i16 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i16_unaligned_unordered: +; CHECK: add x1, x1, #8 +; CHECK: bl __atomic_store + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + store atomic i16 %value, ptr %gep unordered, align 1 + ret void +} + +define void @store_atomic_i16_unaligned_monotonic(i16 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i16_unaligned_monotonic: +; CHECK: add x1, x1, #8 +; CHECK: bl __atomic_store + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + store atomic i16 %value, ptr %gep monotonic, align 1 + ret void +} + +define void @store_atomic_i16_unaligned_release(i16 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i16_unaligned_release: +; CHECK: add x1, x1, #8 +; CHECK: bl __atomic_store + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + store atomic i16 %value, ptr %gep release, align 1 + ret void +} + +define void @store_atomic_i16_unaligned_seq_cst(i16 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i16_unaligned_seq_cst: +; CHECK: add x1, x1, #8 +; CHECK: bl __atomic_store + %gep = getelementptr inbounds i16, ptr %ptr, i32 4 + store atomic i16 %value, ptr %gep seq_cst, align 1 + ret void +} + +define void @store_atomic_i32_unaligned_unordered(i32 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i32_unaligned_unordered: +; CHECK: add x1, x1, #16 +; CHECK: bl __atomic_store + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + store atomic i32 %value, ptr %gep unordered, align 1 + ret void +} + +define void @store_atomic_i32_unaligned_monotonic(i32 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i32_unaligned_monotonic: +; CHECK: add x1, x1, #16 +; CHECK: bl __atomic_store + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + store atomic i32 %value, ptr %gep monotonic, align 1 + ret void +} + +define void @store_atomic_i32_unaligned_release(i32 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i32_unaligned_release: +; CHECK: add x1, x1, #16 +; CHECK: bl __atomic_store + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + store atomic i32 %value, ptr %gep release, align 1 + ret void +} + +define void @store_atomic_i32_unaligned_seq_cst(i32 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i32_unaligned_seq_cst: +; CHECK: add x1, x1, #16 +; CHECK: bl __atomic_store + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 + store atomic i32 %value, ptr %gep seq_cst, align 1 + ret void +} + +define void @store_atomic_i64_unaligned_unordered(i64 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i64_unaligned_unordered: +; CHECK: add x1, x1, #32 +; CHECK: bl __atomic_store + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + store atomic i64 %value, ptr %gep unordered, align 1 + ret void +} + +define void @store_atomic_i64_unaligned_monotonic(i64 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i64_unaligned_monotonic: +; CHECK: add x1, x1, #32 +; CHECK: bl __atomic_store + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + store atomic i64 %value, ptr %gep monotonic, align 1 + ret void +} + +define void @store_atomic_i64_unaligned_release(i64 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i64_unaligned_release: +; CHECK: add x1, x1, #32 +; CHECK: bl __atomic_store + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + store atomic i64 %value, ptr %gep release, align 1 + ret void +} + +define void @store_atomic_i64_unaligned_seq_cst(i64 %value, ptr %ptr) { +; CHECK-LABEL: store_atomic_i64_unaligned_seq_cst: +; CHECK: add x1, x1, #32 +; CHECK: bl __atomic_store + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 + store atomic i64 %value, ptr %gep seq_cst, align 1 + ret void +} + +define void @store_atomic_i128_unaligned_unordered(i128 %value, ptr %ptr) { +; GISEL-LABEL: store_atomic_i128_unaligned_unordered: +; GISEL: add x1, x8, #64 +; GISEL: bl __atomic_store +; +; SDAG-LABEL: store_atomic_i128_unaligned_unordered: +; SDAG: add x1, x2, #64 +; SDAG: bl __atomic_store + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + store atomic i128 %value, ptr %gep unordered, align 1 + ret void +} + +define void @store_atomic_i128_unaligned_monotonic(i128 %value, ptr %ptr) { +; GISEL-LABEL: store_atomic_i128_unaligned_monotonic: +; GISEL: add x1, x8, #64 +; GISEL: bl __atomic_store +; +; SDAG-LABEL: store_atomic_i128_unaligned_monotonic: +; SDAG: add x1, x2, #64 +; SDAG: bl __atomic_store + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + store atomic i128 %value, ptr %gep monotonic, align 1 + ret void +} + +define void @store_atomic_i128_unaligned_release(i128 %value, ptr %ptr) { +; GISEL-LABEL: store_atomic_i128_unaligned_release: +; GISEL: add x1, x8, #64 +; GISEL: bl __atomic_store +; +; SDAG-LABEL: store_atomic_i128_unaligned_release: +; SDAG: add x1, x2, #64 +; SDAG: bl __atomic_store + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + store atomic i128 %value, ptr %gep release, align 1 + ret void +} + +define void @store_atomic_i128_unaligned_seq_cst(i128 %value, ptr %ptr) { +; GISEL-LABEL: store_atomic_i128_unaligned_seq_cst: +; GISEL: add x1, x8, #64 +; GISEL: bl __atomic_store +; +; SDAG-LABEL: store_atomic_i128_unaligned_seq_cst: +; SDAG: add x1, x2, #64 +; SDAG: bl __atomic_store + %gep = getelementptr inbounds i128, ptr %ptr, i32 4 + store atomic i128 %value, ptr %gep seq_cst, align 1 + ret void +} From 9fe5700611de180c2b5cfc0422eaebe1d027a826 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Mon, 30 Oct 2023 15:47:39 +0100 Subject: [PATCH 059/144] [AArch64] Add support for v8.4a `ldapur`/`stlur` AArch64 backend now features v8.4a atomic Load-Acquire RCpc and Store-Release register unscaled support. --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 18 ++++++----- .../lib/Target/AArch64/AArch64InstrAtomics.td | 31 +++++++++++++++++++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +- .../GISel/AArch64InstructionSelector.cpp | 3 -- .../Atomics/aarch64-atomic-load-rcpc_immo.ll | 30 ++++++------------ .../Atomics/aarch64-atomic-store-rcpc_immo.ll | 30 ++++++------------ 6 files changed, 63 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 38759a2474518f..7617dccdeee397 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -997,6 +997,15 @@ static bool isWorthFoldingADDlow(SDValue N) { return true; } +/// Check if the immediate offset is valid as a scaled immediate. +static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, + unsigned Size) { + if ((Offset & (Size - 1)) == 0 && Offset >= 0 && + Offset < (Range << Log2_32(Size))) + return true; + return false; +} + /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit /// immediate" address. The "Size" argument is the size in bytes of the memory /// reference, which determines the scale. @@ -1092,7 +1101,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { int64_t RHSC = (int64_t)RHS->getZExtValue(); unsigned Scale = Log2_32(Size); - if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { + if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); @@ -1130,10 +1139,6 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, return false; if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { int64_t RHSC = RHS->getSExtValue(); - // If the offset is valid as a scaled immediate, don't match here. - if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && - RHSC < (0x1000 << Log2_32(Size))) - return false; if (RHSC >= -256 && RHSC < 256) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { @@ -1312,11 +1317,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, // LDR X2, [BaseReg, X0] if (isa(RHS)) { int64_t ImmOff = (int64_t)cast(RHS)->getZExtValue(); - unsigned Scale = Log2_32(Size); // Skip the immediate can be selected by load/store addressing mode. // Also skip the immediate can be encoded by a single ADD (SUB is also // checked by using -ImmOff). - if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || + if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) || isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) return false; diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td index fa5a8515ed92ec..0002db52b1995c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -573,3 +573,34 @@ let Predicates = [HasRCPC3, HasNEON] in { (i64 (bitconvert (v1f64 VecListOne64:$Vt)))), (STL1 (SUBREG_TO_REG (i64 0), VecListOne64:$Vt, dsub), (i64 0), GPR64sp:$Rn)>; } + +// v8.4a FEAT_LRCPC2 patterns +let Predicates = [HasRCPC_IMMO] in { + // Load-Acquire RCpc Register unscaled loads + def : Pat<(acquiring_load + (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), + (LDAPURBi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(acquiring_load + (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (LDAPURHi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(acquiring_load + (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), + (LDAPURi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(acquiring_load + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (LDAPURXi GPR64sp:$Rn, simm9:$offset)>; + + // Store-Release Register unscaled stores + def : Pat<(releasing_store + (am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STLURBi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(releasing_store + (am_unscaled16 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STLURHi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(releasing_store + (am_unscaled32 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STLURWi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(releasing_store + (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val), + (STLURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>; +} diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index ee42612c0fcdd2..069a283dd311e5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -94,7 +94,7 @@ def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, def HasFlagM : Predicate<"Subtarget->hasFlagM()">, AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">; -def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">, +def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">, AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">; def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 2089bfba5ff37c..88516967515a58 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -7397,9 +7397,6 @@ AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, return std::nullopt; RHSC = RHSOp1.getCImm()->getSExtValue(); - // If the offset is valid as a scaled immediate, don't match here. - if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) - return std::nullopt; if (RHSC >= -256 && RHSC < 256) { MachineOperand &Base = RootDef->getOperand(1); return {{ diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll index 05f37a4e440eb0..cea15419e67c85 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll @@ -36,8 +36,7 @@ define i8 @load_atomic_i8_aligned_monotonic_const(ptr readonly %ptr) { define i8 @load_atomic_i8_aligned_acquire(ptr %ptr) { ; CHECK-LABEL: load_atomic_i8_aligned_acquire: -; CHECK: add x8, x0, #4 -; CHECK: ldaprb w0, [x8] +; CHECK: ldapurb w0, [x0, #4] %gep = getelementptr inbounds i8, ptr %ptr, i32 4 %r = load atomic i8, ptr %gep acquire, align 1 ret i8 %r @@ -45,8 +44,7 @@ define i8 @load_atomic_i8_aligned_acquire(ptr %ptr) { define i8 @load_atomic_i8_aligned_acquire_const(ptr readonly %ptr) { ; CHECK-LABEL: load_atomic_i8_aligned_acquire_const: -; CHECK: add x8, x0, #4 -; CHECK: ldaprb w0, [x8] +; CHECK: ldapurb w0, [x0, #4] %gep = getelementptr inbounds i8, ptr %ptr, i32 4 %r = load atomic i8, ptr %gep acquire, align 1 ret i8 %r @@ -104,8 +102,7 @@ define i16 @load_atomic_i16_aligned_monotonic_const(ptr readonly %ptr) { define i16 @load_atomic_i16_aligned_acquire(ptr %ptr) { ; CHECK-LABEL: load_atomic_i16_aligned_acquire: -; CHECK: add x8, x0, #8 -; CHECK: ldaprh w0, [x8] +; CHECK: ldapurh w0, [x0, #8] %gep = getelementptr inbounds i16, ptr %ptr, i32 4 %r = load atomic i16, ptr %gep acquire, align 2 ret i16 %r @@ -113,8 +110,7 @@ define i16 @load_atomic_i16_aligned_acquire(ptr %ptr) { define i16 @load_atomic_i16_aligned_acquire_const(ptr readonly %ptr) { ; CHECK-LABEL: load_atomic_i16_aligned_acquire_const: -; CHECK: add x8, x0, #8 -; CHECK: ldaprh w0, [x8] +; CHECK: ldapurh w0, [x0, #8] %gep = getelementptr inbounds i16, ptr %ptr, i32 4 %r = load atomic i16, ptr %gep acquire, align 2 ret i16 %r @@ -172,8 +168,7 @@ define i32 @load_atomic_i32_aligned_monotonic_const(ptr readonly %ptr) { define i32 @load_atomic_i32_aligned_acquire(ptr %ptr) { ; CHECK-LABEL: load_atomic_i32_aligned_acquire: -; CHECK: add x8, x0, #16 -; CHECK: ldapr w0, [x8] +; CHECK: ldapur w0, [x0, #16] %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %r = load atomic i32, ptr %gep acquire, align 4 ret i32 %r @@ -181,8 +176,7 @@ define i32 @load_atomic_i32_aligned_acquire(ptr %ptr) { define i32 @load_atomic_i32_aligned_acquire_const(ptr readonly %ptr) { ; CHECK-LABEL: load_atomic_i32_aligned_acquire_const: -; CHECK: add x8, x0, #16 -; CHECK: ldapr w0, [x8] +; CHECK: ldapur w0, [x0, #16] %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %r = load atomic i32, ptr %gep acquire, align 4 ret i32 %r @@ -240,8 +234,7 @@ define i64 @load_atomic_i64_aligned_monotonic_const(ptr readonly %ptr) { define i64 @load_atomic_i64_aligned_acquire(ptr %ptr) { ; CHECK-LABEL: load_atomic_i64_aligned_acquire: -; CHECK: add x8, x0, #32 -; CHECK: ldapr x0, [x8] +; CHECK: ldapur x0, [x0, #32] %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %r = load atomic i64, ptr %gep acquire, align 8 ret i64 %r @@ -249,8 +242,7 @@ define i64 @load_atomic_i64_aligned_acquire(ptr %ptr) { define i64 @load_atomic_i64_aligned_acquire_const(ptr readonly %ptr) { ; CHECK-LABEL: load_atomic_i64_aligned_acquire_const: -; CHECK: add x8, x0, #32 -; CHECK: ldapr x0, [x8] +; CHECK: ldapur x0, [x0, #32] %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %r = load atomic i64, ptr %gep acquire, align 8 ret i64 %r @@ -376,8 +368,7 @@ define i8 @load_atomic_i8_unaligned_monotonic_const(ptr readonly %ptr) { define i8 @load_atomic_i8_unaligned_acquire(ptr %ptr) { ; CHECK-LABEL: load_atomic_i8_unaligned_acquire: -; CHECK: add x8, x0, #4 -; CHECK: ldaprb w0, [x8] +; CHECK: ldapurb w0, [x0, #4] %gep = getelementptr inbounds i8, ptr %ptr, i32 4 %r = load atomic i8, ptr %gep acquire, align 1 ret i8 %r @@ -385,8 +376,7 @@ define i8 @load_atomic_i8_unaligned_acquire(ptr %ptr) { define i8 @load_atomic_i8_unaligned_acquire_const(ptr readonly %ptr) { ; CHECK-LABEL: load_atomic_i8_unaligned_acquire_const: -; CHECK: add x8, x0, #4 -; CHECK: ldaprb w0, [x8] +; CHECK: ldapurb w0, [x0, #4] %gep = getelementptr inbounds i8, ptr %ptr, i32 4 %r = load atomic i8, ptr %gep acquire, align 1 ret i8 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll index 86cb738c5799dd..4f461571c55824 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll @@ -20,8 +20,7 @@ define void @store_atomic_i8_aligned_monotonic(i8 %value, ptr %ptr) { define void @store_atomic_i8_aligned_release(i8 %value, ptr %ptr) { ; CHECK-LABEL: store_atomic_i8_aligned_release: -; CHECK: add x8, x1, #4 -; CHECK: stlrb w0, [x8] +; CHECK: stlurb w0, [x1, #4] %gep = getelementptr inbounds i8, ptr %ptr, i32 4 store atomic i8 %value, ptr %gep release, align 1 ret void @@ -29,8 +28,7 @@ define void @store_atomic_i8_aligned_release(i8 %value, ptr %ptr) { define void @store_atomic_i8_aligned_seq_cst(i8 %value, ptr %ptr) { ; CHECK-LABEL: store_atomic_i8_aligned_seq_cst: -; CHECK: add x8, x1, #4 -; CHECK: stlrb w0, [x8] +; CHECK: stlurb w0, [x1, #4] %gep = getelementptr inbounds i8, ptr %ptr, i32 4 store atomic i8 %value, ptr %gep seq_cst, align 1 ret void @@ -54,8 +52,7 @@ define void @store_atomic_i16_aligned_monotonic(i16 %value, ptr %ptr) { define void @store_atomic_i16_aligned_release(i16 %value, ptr %ptr) { ; CHECK-LABEL: store_atomic_i16_aligned_release: -; CHECK: add x8, x1, #8 -; CHECK: stlrh w0, [x8] +; CHECK: stlurh w0, [x1, #8] %gep = getelementptr inbounds i16, ptr %ptr, i32 4 store atomic i16 %value, ptr %gep release, align 2 ret void @@ -63,8 +60,7 @@ define void @store_atomic_i16_aligned_release(i16 %value, ptr %ptr) { define void @store_atomic_i16_aligned_seq_cst(i16 %value, ptr %ptr) { ; CHECK-LABEL: store_atomic_i16_aligned_seq_cst: -; CHECK: add x8, x1, #8 -; CHECK: stlrh w0, [x8] +; CHECK: stlurh w0, [x1, #8] %gep = getelementptr inbounds i16, ptr %ptr, i32 4 store atomic i16 %value, ptr %gep seq_cst, align 2 ret void @@ -88,8 +84,7 @@ define void @store_atomic_i32_aligned_monotonic(i32 %value, ptr %ptr) { define void @store_atomic_i32_aligned_release(i32 %value, ptr %ptr) { ; CHECK-LABEL: store_atomic_i32_aligned_release: -; CHECK: add x8, x1, #16 -; CHECK: stlr w0, [x8] +; CHECK: stlur w0, [x1, #16] %gep = getelementptr inbounds i32, ptr %ptr, i32 4 store atomic i32 %value, ptr %gep release, align 4 ret void @@ -97,8 +92,7 @@ define void @store_atomic_i32_aligned_release(i32 %value, ptr %ptr) { define void @store_atomic_i32_aligned_seq_cst(i32 %value, ptr %ptr) { ; CHECK-LABEL: store_atomic_i32_aligned_seq_cst: -; CHECK: add x8, x1, #16 -; CHECK: stlr w0, [x8] +; CHECK: stlur w0, [x1, #16] %gep = getelementptr inbounds i32, ptr %ptr, i32 4 store atomic i32 %value, ptr %gep seq_cst, align 4 ret void @@ -122,8 +116,7 @@ define void @store_atomic_i64_aligned_monotonic(i64 %value, ptr %ptr) { define void @store_atomic_i64_aligned_release(i64 %value, ptr %ptr) { ; CHECK-LABEL: store_atomic_i64_aligned_release: -; CHECK: add x8, x1, #32 -; CHECK: stlr x0, [x8] +; CHECK: stlur x0, [x1, #32] %gep = getelementptr inbounds i64, ptr %ptr, i32 4 store atomic i64 %value, ptr %gep release, align 8 ret void @@ -131,8 +124,7 @@ define void @store_atomic_i64_aligned_release(i64 %value, ptr %ptr) { define void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) { ; CHECK-LABEL: store_atomic_i64_aligned_seq_cst: -; CHECK: add x8, x1, #32 -; CHECK: stlr x0, [x8] +; CHECK: stlur x0, [x1, #32] %gep = getelementptr inbounds i64, ptr %ptr, i32 4 store atomic i64 %value, ptr %gep seq_cst, align 8 ret void @@ -191,8 +183,7 @@ define void @store_atomic_i8_unaligned_monotonic(i8 %value, ptr %ptr) { define void @store_atomic_i8_unaligned_release(i8 %value, ptr %ptr) { ; CHECK-LABEL: store_atomic_i8_unaligned_release: -; CHECK: add x8, x1, #4 -; CHECK: stlrb w0, [x8] +; CHECK: stlurb w0, [x1, #4] %gep = getelementptr inbounds i8, ptr %ptr, i32 4 store atomic i8 %value, ptr %gep release, align 1 ret void @@ -200,8 +191,7 @@ define void @store_atomic_i8_unaligned_release(i8 %value, ptr %ptr) { define void @store_atomic_i8_unaligned_seq_cst(i8 %value, ptr %ptr) { ; CHECK-LABEL: store_atomic_i8_unaligned_seq_cst: -; CHECK: add x8, x1, #4 -; CHECK: stlrb w0, [x8] +; CHECK: stlurb w0, [x1, #4] %gep = getelementptr inbounds i8, ptr %ptr, i32 4 store atomic i8 %value, ptr %gep seq_cst, align 1 ret void From 896749aa0d420ae573255a64a349bc2a76cfed37 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Mon, 30 Oct 2023 18:35:52 +0000 Subject: [PATCH 060/144] [amdgpu][openmp] Avoiding writing to packet header twice (#70695) I think it follows from the HSA spec that a write to the first byte is deemed significant to the GPU in which case writing to the second short and reading back from it later would be safe. However, the examples for this all involve an atomic write to the first 32 bits and it seems a credible risk that the occasional CI errors abound invalid packets have as their root cause that the firmware notices the early write to packet->setup and treats that as a sign that the packet is ready to go. That was overly-paranoid, however in passing noticed the code in libc is genuinely invalid. The memset writes a zero to the header byte, changing it from type_invalid (1) to type_vendor (0), at which point the GPU is free to read the 64 byte packet and interpret it as a vendor packet, which is probably why libc CI periodically errors about invalid packets. Also a drive by change to do the atomic store on a uint32_t consistently. I'm not sure offhand what __atomic_store_n on a uint16_t* and an int resolves to, seems better to be unambiguous there. --- libc/utils/gpu/loader/amdgpu/Loader.cpp | 15 ++++++++------- .../plugins-nextgen/amdgpu/src/rtl.cpp | 16 ++++++++-------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp index 1d0247a6dc5dca..c2a11fd8aab72b 100644 --- a/libc/utils/gpu/loader/amdgpu/Loader.cpp +++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp @@ -222,13 +222,13 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, // Set up the packet for exeuction on the device. We currently only launch // with one thread on the device, forcing the rest of the wavefront to be // masked off. - std::memset(packet, 0, sizeof(hsa_kernel_dispatch_packet_t)); - packet->setup = (1 + (params.num_blocks_y * params.num_threads_y != 1) + - (params.num_blocks_z * params.num_threads_z != 1)) - << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + uint16_t setup = (1 + (params.num_blocks_y * params.num_threads_y != 1) + + (params.num_blocks_z * params.num_threads_z != 1)) + << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; packet->workgroup_size_x = params.num_threads_x; packet->workgroup_size_y = params.num_threads_y; packet->workgroup_size_z = params.num_threads_z; + packet->reserved0 = 0; packet->grid_size_x = params.num_blocks_x * params.num_threads_x; packet->grid_size_y = params.num_blocks_y * params.num_threads_y; packet->grid_size_z = params.num_blocks_z * params.num_threads_z; @@ -236,7 +236,7 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, packet->group_segment_size = group_size; packet->kernel_object = kernel; packet->kernarg_address = args; - + packet->reserved2 = 0; // Create a signal to indicate when this packet has been completed. if (hsa_status_t err = hsa_signal_create(1, 0, nullptr, &packet->completion_signal)) @@ -244,12 +244,13 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, // Initialize the packet header and set the doorbell signal to begin execution // by the HSA runtime. - uint16_t setup = packet->setup; uint16_t header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) | (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE); - __atomic_store_n(&packet->header, header | (setup << 16), __ATOMIC_RELEASE); + uint32_t header_word = + header | (setup << 16u) __atomic_store_n((uint32_t *)&packet->header, + header_word, __ATOMIC_RELEASE); hsa_signal_store_relaxed(queue->doorbell_signal, packet_id); // Wait until the kernel has completed execution on the device. Periodically diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index fbecb4963c4abc..71207f767fdcc6 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -649,8 +649,8 @@ struct AMDGPUQueueTy { hsa_kernel_dispatch_packet_t *Packet = acquirePacket(PacketId); assert(Packet && "Invalid packet"); - // The header of the packet is written in the last moment. - Packet->setup = UINT16_C(1) << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + // The first 32 bits of the packet are written after the other fields + uint16_t Setup = UINT16_C(1) << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; Packet->workgroup_size_x = NumThreads; Packet->workgroup_size_y = 1; Packet->workgroup_size_z = 1; @@ -666,7 +666,7 @@ struct AMDGPUQueueTy { Packet->completion_signal = OutputSignal->get(); // Publish the packet. Do not modify the packet after this point. - publishKernelPacket(PacketId, Packet); + publishKernelPacket(PacketId, Setup, Packet); return Plugin::success(); } @@ -743,17 +743,17 @@ struct AMDGPUQueueTy { /// Publish the kernel packet so that the HSA runtime can start processing /// the kernel launch. Do not modify the packet once this function is called. /// Assumes the queue lock is acquired. - void publishKernelPacket(uint64_t PacketId, + void publishKernelPacket(uint64_t PacketId, uint16_t Setup, hsa_kernel_dispatch_packet_t *Packet) { uint32_t *PacketPtr = reinterpret_cast(Packet); - uint16_t Setup = Packet->setup; uint16_t Header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; Header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; Header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; // Publish the packet. Do not modify the package after this point. - __atomic_store_n(PacketPtr, Header | (Setup << 16), __ATOMIC_RELEASE); + uint32_t HeaderWord = Header | (Setup << 16u); + __atomic_store_n(PacketPtr, HeaderWord, __ATOMIC_RELEASE); // Signal the doorbell about the published packet. hsa_signal_store_relaxed(Queue->doorbell_signal, PacketId); @@ -765,14 +765,14 @@ struct AMDGPUQueueTy { void publishBarrierPacket(uint64_t PacketId, hsa_barrier_and_packet_t *Packet) { uint32_t *PacketPtr = reinterpret_cast(Packet); - uint16_t Setup = 0; uint16_t Header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; Header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; Header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; // Publish the packet. Do not modify the package after this point. - __atomic_store_n(PacketPtr, Header | (Setup << 16), __ATOMIC_RELEASE); + uint32_t HeaderWord = Header | (Setup << 16u); + __atomic_store_n(PacketPtr, HeaderWord, __ATOMIC_RELEASE); // Signal the doorbell about the published packet. hsa_signal_store_relaxed(Queue->doorbell_signal, PacketId); From 89564f0b69ac935dc3b865e4d627f3a0610b4abd Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 30 Oct 2023 11:33:05 -0700 Subject: [PATCH 061/144] Regenerate a set of auto-update tests [nfc] To reduce the spurious test delta in an upcoming change. --- .../InstCombine/adjust-for-minmax.ll | 64 +++++------ .../Transforms/InstCombine/cast-mul-select.ll | 108 ++++++++++++++++-- .../SLPVectorizer/AArch64/getelementptr.ll | 58 +++++----- .../AMDGPU/uniform-unswitch.ll | 4 +- 4 files changed, 164 insertions(+), 70 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll index 703ba52fa378f6..67871f3d64c411 100644 --- a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll +++ b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll @@ -245,8 +245,8 @@ define <2 x i32> @umin4_vec(<2 x i32> %n) { define i64 @smax_sext(i32 %a) { ; CHECK-LABEL: @smax_sext( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 0) -; CHECK-NEXT: [[MAX:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 0) +; CHECK-NEXT: [[MAX:%.*]] = zext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MAX]] ; %a_ext = sext i32 %a to i64 @@ -257,8 +257,8 @@ define i64 @smax_sext(i32 %a) { define <2 x i64> @smax_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @smax_sext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> zeroinitializer) -; CHECK-NEXT: [[MAX:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> zeroinitializer) +; CHECK-NEXT: [[MAX:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MAX]] ; %a_ext = sext <2 x i32> %a to <2 x i64> @@ -269,8 +269,8 @@ define <2 x i64> @smax_sext_vec(<2 x i32> %a) { define i64 @smin_sext(i32 %a) { ; CHECK-LABEL: @smin_sext( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 0) -; CHECK-NEXT: [[MIN:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 0) +; CHECK-NEXT: [[MIN:%.*]] = sext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MIN]] ; %a_ext = sext i32 %a to i64 @@ -281,8 +281,8 @@ define i64 @smin_sext(i32 %a) { define <2 x i64>@smin_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @smin_sext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> zeroinitializer) -; CHECK-NEXT: [[MIN:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> zeroinitializer) +; CHECK-NEXT: [[MIN:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; %a_ext = sext <2 x i32> %a to <2 x i64> @@ -293,8 +293,8 @@ define <2 x i64>@smin_sext_vec(<2 x i32> %a) { define i64 @umax_sext(i32 %a) { ; CHECK-LABEL: @umax_sext( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 3) -; CHECK-NEXT: [[MAX:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 3) +; CHECK-NEXT: [[MAX:%.*]] = sext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MAX]] ; %a_ext = sext i32 %a to i64 @@ -305,8 +305,8 @@ define i64 @umax_sext(i32 %a) { define <2 x i64> @umax_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umax_sext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) -; CHECK-NEXT: [[MAX:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[MAX:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MAX]] ; %a_ext = sext <2 x i32> %a to <2 x i64> @@ -317,8 +317,8 @@ define <2 x i64> @umax_sext_vec(<2 x i32> %a) { define i64 @umin_sext(i32 %a) { ; CHECK-LABEL: @umin_sext( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 2) -; CHECK-NEXT: [[MIN:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 2) +; CHECK-NEXT: [[MIN:%.*]] = zext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MIN]] ; %a_ext = sext i32 %a to i64 @@ -329,8 +329,8 @@ define i64 @umin_sext(i32 %a) { define <2 x i64> @umin_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_sext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) -; CHECK-NEXT: [[MIN:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[MIN:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; %a_ext = sext <2 x i32> %a to <2 x i64> @@ -341,8 +341,8 @@ define <2 x i64> @umin_sext_vec(<2 x i32> %a) { define i64 @umax_sext2(i32 %a) { ; CHECK-LABEL: @umax_sext2( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 2) -; CHECK-NEXT: [[MIN:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 2) +; CHECK-NEXT: [[MIN:%.*]] = sext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MIN]] ; %a_ext = sext i32 %a to i64 @@ -353,8 +353,8 @@ define i64 @umax_sext2(i32 %a) { define <2 x i64> @umax_sext2_vec(<2 x i32> %a) { ; CHECK-LABEL: @umax_sext2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) -; CHECK-NEXT: [[MIN:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[MIN:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; %a_ext = sext <2 x i32> %a to <2 x i64> @@ -365,8 +365,8 @@ define <2 x i64> @umax_sext2_vec(<2 x i32> %a) { define i64 @umin_sext2(i32 %a) { ; CHECK-LABEL: @umin_sext2( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 3) -; CHECK-NEXT: [[MIN:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 3) +; CHECK-NEXT: [[MIN:%.*]] = zext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MIN]] ; %a_ext = sext i32 %a to i64 @@ -377,8 +377,8 @@ define i64 @umin_sext2(i32 %a) { define <2 x i64> @umin_sext2_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_sext2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) -; CHECK-NEXT: [[MIN:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[MIN:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; %a_ext = sext <2 x i32> %a to <2 x i64> @@ -389,8 +389,8 @@ define <2 x i64> @umin_sext2_vec(<2 x i32> %a) { define i64 @umax_zext(i32 %a) { ; CHECK-LABEL: @umax_zext( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 3) -; CHECK-NEXT: [[MAX:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 3) +; CHECK-NEXT: [[MAX:%.*]] = zext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MAX]] ; %a_ext = zext i32 %a to i64 @@ -401,8 +401,8 @@ define i64 @umax_zext(i32 %a) { define <2 x i64> @umax_zext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umax_zext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) -; CHECK-NEXT: [[MAX:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[MAX:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MAX]] ; %a_ext = zext <2 x i32> %a to <2 x i64> @@ -413,8 +413,8 @@ define <2 x i64> @umax_zext_vec(<2 x i32> %a) { define i64 @umin_zext(i32 %a) { ; CHECK-LABEL: @umin_zext( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 2) -; CHECK-NEXT: [[MIN:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[NARROW:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 2) +; CHECK-NEXT: [[MIN:%.*]] = zext i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MIN]] ; %a_ext = zext i32 %a to i64 @@ -425,8 +425,8 @@ define i64 @umin_zext(i32 %a) { define <2 x i64> @umin_zext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_zext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) -; CHECK-NEXT: [[MIN:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[MIN:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; %a_ext = zext <2 x i32> %a to <2 x i64> diff --git a/llvm/test/Transforms/InstCombine/cast-mul-select.ll b/llvm/test/Transforms/InstCombine/cast-mul-select.ll index 8c06e556de48a0..ab8333beb9e766 100644 --- a/llvm/test/Transforms/InstCombine/cast-mul-select.ll +++ b/llvm/test/Transforms/InstCombine/cast-mul-select.ll @@ -9,17 +9,20 @@ define i32 @mul(i32 %x, i32 %y) { ; CHECK-NEXT: [[C:%.*]] = mul i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[D:%.*]] = and i32 [[C]], 255 ; CHECK-NEXT: ret i32 [[D]] +; +; DBGINFO-LABEL: @mul( +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[X:%.*]], metadata [[META9:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG15:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[Y:%.*]], metadata [[META11:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG16:![0-9]+]] +; DBGINFO-NEXT: [[C:%.*]] = mul i32 [[X]], [[Y]], !dbg [[DBG17:![0-9]+]] +; DBGINFO-NEXT: [[D:%.*]] = and i32 [[C]], 255, !dbg [[DBG18:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[C]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[D]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18]] +; DBGINFO-NEXT: ret i32 [[D]], !dbg [[DBG19:![0-9]+]] +; ; Test that when zext is evaluated in different type ; we preserve the debug information in the resulting ; instruction. -; DBGINFO-LABEL: @mul( -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 %x, {{.*}} !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)) -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 %y, {{.*}} !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)) -; DBGINFO-NEXT: [[C:%.*]] = mul i32 {{.*}} -; DBGINFO-NEXT: [[D:%.*]] = and i32 {{.*}} -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[C]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[D]] %A = trunc i32 %x to i8 %B = trunc i32 %y to i8 @@ -34,6 +37,18 @@ define i32 @select1(i1 %cond, i32 %x, i32 %y, i32 %z) { ; CHECK-NEXT: [[E:%.*]] = select i1 [[COND:%.*]], i32 [[Z:%.*]], i32 [[D]] ; CHECK-NEXT: [[F:%.*]] = and i32 [[E]], 255 ; CHECK-NEXT: ret i32 [[F]] +; +; DBGINFO-LABEL: @select1( +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[X:%.*]], metadata [[META22:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG28:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[Y:%.*]], metadata [[META23:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG29:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[Z:%.*]], metadata [[META24:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG30:![0-9]+]] +; DBGINFO-NEXT: [[D:%.*]] = add i32 [[X]], [[Y]], !dbg [[DBG31:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata !DIArgList(i32 [[X]], i32 [[Y]]), metadata [[META25:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_plus, DW_OP_stack_value)), !dbg [[DBG31]] +; DBGINFO-NEXT: [[E:%.*]] = select i1 [[COND:%.*]], i32 [[Z]], i32 [[D]], !dbg [[DBG32:![0-9]+]] +; DBGINFO-NEXT: [[F:%.*]] = and i32 [[E]], 255, !dbg [[DBG33:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[E]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[F]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG33]] +; DBGINFO-NEXT: ret i32 [[F]], !dbg [[DBG34:![0-9]+]] ; %A = trunc i32 %x to i8 %B = trunc i32 %y to i8 @@ -49,6 +64,17 @@ define i8 @select2(i1 %cond, i8 %x, i8 %y, i8 %z) { ; CHECK-NEXT: [[D:%.*]] = add i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[E:%.*]] = select i1 [[COND:%.*]], i8 [[Z:%.*]], i8 [[D]] ; CHECK-NEXT: ret i8 [[E]] +; +; DBGINFO-LABEL: @select2( +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[X:%.*]], metadata [[META37:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG43:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[Y:%.*]], metadata [[META38:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG44:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[Z:%.*]], metadata [[META39:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG45:![0-9]+]] +; DBGINFO-NEXT: [[D:%.*]] = add i8 [[X]], [[Y]], !dbg [[DBG46:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata !DIArgList(i8 [[X]], i8 [[Y]]), metadata [[META40:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_plus, DW_OP_stack_value)), !dbg [[DBG46]] +; DBGINFO-NEXT: [[E:%.*]] = select i1 [[COND:%.*]], i8 [[Z]], i8 [[D]], !dbg [[DBG47:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 poison, metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[E]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] +; DBGINFO-NEXT: ret i8 [[E]], !dbg [[DBG49:![0-9]+]] ; %A = zext i8 %x to i32 %B = zext i8 %y to i32 @@ -69,6 +95,17 @@ define i32 @eval_trunc_multi_use_in_one_inst(i32 %x) { ; CHECK-NEXT: [[M:%.*]] = mul i64 [[A]], [[A]] ; CHECK-NEXT: [[T:%.*]] = trunc i64 [[M]] to i32 ; CHECK-NEXT: ret i32 [[T]] +; +; DBGINFO-LABEL: @eval_trunc_multi_use_in_one_inst( +; DBGINFO-NEXT: [[Z:%.*]] = zext i32 [[X:%.*]] to i64, !dbg [[DBG57:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i64 [[Z]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG57]] +; DBGINFO-NEXT: [[A:%.*]] = add nuw nsw i64 [[Z]], 15, !dbg [[DBG58:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i64 [[A]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58]] +; DBGINFO-NEXT: [[M:%.*]] = mul i64 [[A]], [[A]], !dbg [[DBG59:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i64 [[M]], metadata [[META55:![0-9]+]], metadata !DIExpression()), !dbg [[DBG59]] +; DBGINFO-NEXT: [[T:%.*]] = trunc i64 [[M]] to i32, !dbg [[DBG60:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[T]], metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +; DBGINFO-NEXT: ret i32 [[T]], !dbg [[DBG61:![0-9]+]] ; %z = zext i32 %x to i64 %a = add nsw nuw i64 %z, 15 @@ -84,6 +121,17 @@ define i32 @eval_zext_multi_use_in_one_inst(i32 %x) { ; CHECK-NEXT: [[M:%.*]] = mul nuw nsw i16 [[A]], [[A]] ; CHECK-NEXT: [[R:%.*]] = zext i16 [[M]] to i32 ; CHECK-NEXT: ret i32 [[R]] +; +; DBGINFO-LABEL: @eval_zext_multi_use_in_one_inst( +; DBGINFO-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i16, !dbg [[DBG69:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[T]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] +; DBGINFO-NEXT: [[A:%.*]] = and i16 [[T]], 5, !dbg [[DBG70:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[A]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG70]] +; DBGINFO-NEXT: [[M:%.*]] = mul nuw nsw i16 [[A]], [[A]], !dbg [[DBG71:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[M]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG71]] +; DBGINFO-NEXT: [[R:%.*]] = zext i16 [[M]] to i32, !dbg [[DBG72:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[R]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG72]] +; DBGINFO-NEXT: ret i32 [[R]], !dbg [[DBG73:![0-9]+]] ; %t = trunc i32 %x to i16 %a = and i16 %t, 5 @@ -100,6 +148,19 @@ define i32 @eval_sext_multi_use_in_one_inst(i32 %x) { ; CHECK-NEXT: [[O:%.*]] = or i16 [[M]], -32768 ; CHECK-NEXT: [[R:%.*]] = sext i16 [[O]] to i32 ; CHECK-NEXT: ret i32 [[R]] +; +; DBGINFO-LABEL: @eval_sext_multi_use_in_one_inst( +; DBGINFO-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i16, !dbg [[DBG81:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[T]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81]] +; DBGINFO-NEXT: [[A:%.*]] = and i16 [[T]], 14, !dbg [[DBG82:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[A]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82]] +; DBGINFO-NEXT: [[M:%.*]] = mul nuw nsw i16 [[A]], [[A]], !dbg [[DBG83:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[M]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG83]] +; DBGINFO-NEXT: [[O:%.*]] = or i16 [[M]], -32768, !dbg [[DBG84:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[O]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG84]] +; DBGINFO-NEXT: [[R:%.*]] = sext i16 [[O]] to i32, !dbg [[DBG85:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[R]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85]] +; DBGINFO-NEXT: ret i32 [[R]], !dbg [[DBG86:![0-9]+]] ; %t = trunc i32 %x to i16 %a = and i16 %t, 14 @@ -140,6 +201,39 @@ define void @PR36225(i32 %a, i32 %b, i1 %c1, i3 %v1, i3 %v2) { ; CHECK: exit: ; CHECK-NEXT: unreachable ; +; DBGINFO-LABEL: @PR36225( +; DBGINFO-NEXT: entry: +; DBGINFO-NEXT: br label [[WHILE_BODY:%.*]], !dbg [[DBG94:![0-9]+]] +; DBGINFO: while.body: +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[B:%.*]], metadata [[META89:![0-9]+]], metadata !DIExpression(DW_OP_constu, 0, DW_OP_eq, DW_OP_stack_value)), !dbg [[DBG95:![0-9]+]] +; DBGINFO-NEXT: br i1 [[C1:%.*]], label [[FOR_BODY3_US:%.*]], label [[FOR_BODY3:%.*]], !dbg [[DBG96:![0-9]+]] +; DBGINFO: for.body3.us: +; DBGINFO-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[B]], 0, !dbg [[DBG95]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i1 [[TOBOOL]], metadata [[META89]], metadata !DIExpression()), !dbg [[DBG95]] +; DBGINFO-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i8 0, i8 4, !dbg [[DBG97:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[SPEC_SELECT]], metadata [[META90:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97]] +; DBGINFO-NEXT: switch i3 [[V1:%.*]], label [[EXIT:%.*]] [ +; DBGINFO-NEXT: i3 0, label [[FOR_END:%.*]] +; DBGINFO-NEXT: i3 -1, label [[FOR_END]] +; DBGINFO-NEXT: ], !dbg [[DBG98:![0-9]+]] +; DBGINFO: for.body3: +; DBGINFO-NEXT: switch i3 [[V2:%.*]], label [[EXIT]] [ +; DBGINFO-NEXT: i3 0, label [[FOR_END]] +; DBGINFO-NEXT: i3 -1, label [[FOR_END]] +; DBGINFO-NEXT: ], !dbg [[DBG99:![0-9]+]] +; DBGINFO: for.end: +; DBGINFO-NEXT: [[H:%.*]] = phi i8 [ [[SPEC_SELECT]], [[FOR_BODY3_US]] ], [ [[SPEC_SELECT]], [[FOR_BODY3_US]] ], [ 0, [[FOR_BODY3]] ], [ 0, [[FOR_BODY3]] ], !dbg [[DBG100:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[H]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG100]] +; DBGINFO-NEXT: [[CONV:%.*]] = zext i8 [[H]] to i32, !dbg [[DBG101:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[CONV]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG101]] +; DBGINFO-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], [[A:%.*]], !dbg [[DBG102:![0-9]+]] +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]], metadata [[META93:![0-9]+]], metadata !DIExpression()), !dbg [[DBG102]] +; DBGINFO-NEXT: br i1 [[CMP]], label [[EXIT]], label [[EXIT2:%.*]], !dbg [[DBG103:![0-9]+]] +; DBGINFO: exit2: +; DBGINFO-NEXT: unreachable, !dbg [[DBG104:![0-9]+]] +; DBGINFO: exit: +; DBGINFO-NEXT: unreachable, !dbg [[DBG105:![0-9]+]] +; entry: br label %while.body diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll index c36c8c27a8ef7b..9a55e1eee5bd40 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll @@ -234,42 +234,42 @@ define void @test_i16_extend(ptr %p.1, ptr %p.2, i32 %idx.i32) { ; CHECK-NEXT: [[IDX_0:%.*]] = zext i32 [[IDX_I32:%.*]] to i64 ; CHECK-NEXT: [[T53:%.*]] = getelementptr inbounds i16, ptr [[P_1:%.*]], i64 [[IDX_0]] ; CHECK-NEXT: [[T56:%.*]] = getelementptr inbounds i16, ptr [[P_2:%.*]], i64 [[IDX_0]] -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[T53]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr [[T56]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i16> [[TMP5]] to <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <8 x i32> [[TMP3]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; CHECK-NEXT: [[T60:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[T53]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr [[T56]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[T60:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP7]] ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[T60]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP7]], i64 1 -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[T71:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP5]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[T71:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP9]] ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[T71]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP7]], i64 2 -; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[T82:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP5]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 +; CHECK-NEXT: [[T82:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP11]] ; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[T82]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP7]], i64 3 -; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[T93:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP5]], i64 3 +; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[T93:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP13]] ; CHECK-NEXT: [[L_4:%.*]] = load i32, ptr [[T93]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP7]], i64 4 -; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 -; CHECK-NEXT: [[T104:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP5]], i64 4 +; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 +; CHECK-NEXT: [[T104:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP15]] ; CHECK-NEXT: [[L_5:%.*]] = load i32, ptr [[T104]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP7]], i64 5 -; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64 -; CHECK-NEXT: [[T115:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP5]], i64 5 +; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 +; CHECK-NEXT: [[T115:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP17]] ; CHECK-NEXT: [[L_6:%.*]] = load i32, ptr [[T115]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP7]], i64 6 -; CHECK-NEXT: [[TMP21:%.*]] = sext i32 [[TMP20]] to i64 -; CHECK-NEXT: [[T126:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP5]], i64 6 +; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64 +; CHECK-NEXT: [[T126:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP19]] ; CHECK-NEXT: [[L_7:%.*]] = load i32, ptr [[T126]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP7]], i64 7 -; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -; CHECK-NEXT: [[T137:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP23]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP5]], i64 7 +; CHECK-NEXT: [[TMP21:%.*]] = sext i32 [[TMP20]] to i64 +; CHECK-NEXT: [[T137:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP21]] ; CHECK-NEXT: [[L_8:%.*]] = load i32, ptr [[T137]], align 4 ; CHECK-NEXT: call void @use(i32 [[L_1]], i32 [[L_2]], i32 [[L_3]], i32 [[L_4]], i32 [[L_5]], i32 [[L_6]], i32 [[L_7]], i32 [[L_8]]) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll index 8d1b0c7177be4b..cbbf4d6e7be195 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll @@ -19,8 +19,8 @@ ; SHOULDBE-NEXT: br i1 define amdgpu_kernel void @uniform_unswitch(ptr nocapture %out, i32 %n, i32 %x) { -; CHECK-LABEL: define amdgpu_kernel void @uniform_unswitch -; CHECK-SAME: (ptr nocapture writeonly [[OUT:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-LABEL: define amdgpu_kernel void @uniform_unswitch( +; CHECK-SAME: ptr nocapture writeonly [[OUT:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OUT_GLOBAL:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) ; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0 From 849f963e3139d79eba9989554a299ec6a1a16b10 Mon Sep 17 00:00:00 2001 From: Igor Kirillov Date: Mon, 30 Oct 2023 18:40:48 +0000 Subject: [PATCH 062/144] [CodeGen] Improve ExpandMemCmp for more efficient non-register aligned sizes handling (#70469) * Enhanced the logic of ExpandMemCmp pass to merge contiguous subsequences in LoadSequence, based on sizes allowed in `AllowedTailExpansions`. * This enhancement seeks to minimize the number of basic blocks and produce optimized code when using memcmp with non-register aligned sizes. * Enable this feature for AArch64 with memcmp sizes modulo 8 equal to 3, 5, and 6. Reapplication of #69942 after fixing a bug --- .../llvm/Analysis/TargetTransformInfo.h | 11 + llvm/lib/CodeGen/ExpandMemCmp.cpp | 95 +- .../AArch64/AArch64TargetTransformInfo.cpp | 1 + llvm/test/CodeGen/AArch64/memcmp.ll | 3005 +++++++++++++++++ .../Transforms/ExpandMemCmp/AArch64/memcmp.ll | 881 +++++ 5 files changed, 3973 insertions(+), 20 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/memcmp.ll create mode 100644 llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 5234ef8788d9e9..3ec80d99b392b2 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -907,6 +907,17 @@ class TargetTransformInfo { // be done with two 4-byte compares instead of 4+2+1-byte compares. This // requires all loads in LoadSizes to be doable in an unaligned way. bool AllowOverlappingLoads = false; + + // Sometimes, the amount of data that needs to be compared is smaller than + // the standard register size, but it cannot be loaded with just one load + // instruction. For example, if the size of the memory comparison is 6 + // bytes, we can handle it more efficiently by loading all 6 bytes in a + // single block and generating an 8-byte number, instead of generating two + // separate blocks with conditional jumps for 4 and 2 byte loads. This + // approach simplifies the process and produces the comparison result as + // normal. This array lists the allowed sizes of memcmp tails that can be + // merged into one block + SmallVector AllowedTailExpansions; }; MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index 911ebd41afc5b9..28e258be226a69 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -117,8 +117,8 @@ class MemCmpExpansion { Value *Lhs = nullptr; Value *Rhs = nullptr; }; - LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType, - unsigned OffsetBytes); + LoadPair getLoadPair(Type *LoadSizeType, Type *BSwapSizeType, + Type *CmpSizeType, unsigned OffsetBytes); static LoadEntryVector computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef LoadSizes, @@ -128,6 +128,11 @@ class MemCmpExpansion { unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte); + static void optimiseLoadSequence( + LoadEntryVector &LoadSequence, + const TargetTransformInfo::MemCmpExpansionOptions &Options, + bool IsUsedForZeroCmp); + public: MemCmpExpansion(CallInst *CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, @@ -210,6 +215,37 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size, return LoadSequence; } +void MemCmpExpansion::optimiseLoadSequence( + LoadEntryVector &LoadSequence, + const TargetTransformInfo::MemCmpExpansionOptions &Options, + bool IsUsedForZeroCmp) { + // This part of code attempts to optimize the LoadSequence by merging allowed + // subsequences into single loads of allowed sizes from + // `MemCmpExpansionOptions::AllowedTailExpansions`. If it is for zero + // comparison or if no allowed tail expansions are specified, we exit early. + if (IsUsedForZeroCmp || Options.AllowedTailExpansions.empty()) + return; + + while (LoadSequence.size() >= 2) { + auto Last = LoadSequence[LoadSequence.size() - 1]; + auto PreLast = LoadSequence[LoadSequence.size() - 2]; + + // Exit the loop if the two sequences are not contiguous + if (PreLast.Offset + PreLast.LoadSize != Last.Offset) + break; + + auto LoadSize = Last.LoadSize + PreLast.LoadSize; + if (find(Options.AllowedTailExpansions, LoadSize) == + Options.AllowedTailExpansions.end()) + break; + + // Remove the last two sequences and replace with the combined sequence + LoadSequence.pop_back(); + LoadSequence.pop_back(); + LoadSequence.emplace_back(PreLast.Offset, LoadSize); + } +} + // Initialize the basic block structure required for expansion of memcmp call // with given maximum load size and memcmp size parameter. // This structure includes: @@ -255,6 +291,7 @@ MemCmpExpansion::MemCmpExpansion( } } assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant"); + optimiseLoadSequence(LoadSequence, Options, IsUsedForZeroCmp); } unsigned MemCmpExpansion::getNumBlocks() { @@ -278,7 +315,7 @@ void MemCmpExpansion::createResultBlock() { } MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType, - bool NeedsBSwap, + Type *BSwapSizeType, Type *CmpSizeType, unsigned OffsetBytes) { // Get the memory source at offset `OffsetBytes`. @@ -307,16 +344,22 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType, if (!Rhs) Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign); + // Zero extend if Byte Swap intrinsic has different type + if (BSwapSizeType && LoadSizeType != BSwapSizeType) { + Lhs = Builder.CreateZExt(Lhs, BSwapSizeType); + Rhs = Builder.CreateZExt(Rhs, BSwapSizeType); + } + // Swap bytes if required. - if (NeedsBSwap) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); + if (BSwapSizeType) { + Function *Bswap = Intrinsic::getDeclaration( + CI->getModule(), Intrinsic::bswap, BSwapSizeType); Lhs = Builder.CreateCall(Bswap, Lhs); Rhs = Builder.CreateCall(Bswap, Rhs); } // Zero extend if required. - if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) { + if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) { Lhs = Builder.CreateZExt(Lhs, CmpSizeType); Rhs = Builder.CreateZExt(Rhs, CmpSizeType); } @@ -332,7 +375,7 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, BasicBlock *BB = LoadCmpBlocks[BlockIndex]; Builder.SetInsertPoint(BB); const LoadPair Loads = - getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false, + getLoadPair(Type::getInt8Ty(CI->getContext()), nullptr, Type::getInt32Ty(CI->getContext()), OffsetBytes); Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs); @@ -385,11 +428,12 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, IntegerType *const MaxLoadType = NumLoads == 1 ? nullptr : IntegerType::get(CI->getContext(), MaxLoadSize * 8); + for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; const LoadPair Loads = getLoadPair( - IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), - /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset); + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), nullptr, + MaxLoadType, CurLoadEntry.Offset); if (NumLoads != 1) { // If we have multiple loads per block, we need to generate a composite @@ -475,14 +519,20 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { Type *LoadSizeType = IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); - Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + Type *BSwapSizeType = + DL.isLittleEndian() + ? IntegerType::get(CI->getContext(), + PowerOf2Ceil(CurLoadEntry.LoadSize * 8)) + : nullptr; + Type *MaxLoadType = IntegerType::get( + CI->getContext(), + std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8); assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type"); Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - const LoadPair Loads = - getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType, - CurLoadEntry.Offset); + const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType, + CurLoadEntry.Offset); // Add the loaded values to the phi nodes for calculating memcmp result only // if result is not used in a zero equality. @@ -587,19 +637,24 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { /// A memcmp expansion that only has one block of load and compare can bypass /// the compare, branch, and phi IR that is required in the general case. Value *MemCmpExpansion::getMemCmpOneBlock() { - Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); bool NeedsBSwap = DL.isLittleEndian() && Size != 1; + Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); + Type *BSwapSizeType = + NeedsBSwap ? IntegerType::get(CI->getContext(), PowerOf2Ceil(Size * 8)) + : nullptr; + Type *MaxLoadType = + IntegerType::get(CI->getContext(), + std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(Size)) * 8); // The i8 and i16 cases don't need compares. We zext the loaded values and // subtract them to get the suitable negative, zero, or positive i32 result. - if (Size < 4) { - const LoadPair Loads = - getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(), - /*Offset*/ 0); + if (Size == 1 || Size == 2) { + const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, + Builder.getInt32Ty(), /*Offset*/ 0); return Builder.CreateSub(Loads.Lhs, Loads.Rhs); } - const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType, + const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType, /*Offset*/ 0); // The result of memcmp is negative, zero, or positive, so produce that by // subtracting 2 extended compare bits: sub (ugt, ult). diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 6bbd7009e2378a..776619c90393c0 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2994,6 +2994,7 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { // they may wake up the FP unit, which raises the power consumption. Perhaps // they could be used with no holds barred (-O3). Options.LoadSizes = {8, 4, 2, 1}; + Options.AllowedTailExpansions = {3, 5, 6}; return Options; } diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll new file mode 100644 index 00000000000000..d13a416a28761c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/memcmp.ll @@ -0,0 +1,3005 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i64) + +define i32 @length0(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length0: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + ret i32 %m + } + +define i1 @length0_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length0_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length0_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length0_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length2(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: sub w0, w8, w9, lsr #16 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + ret i32 %m +} + +define i32 @length2_const(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2_const: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w9, [x0] +; CHECK-NEXT: mov w8, #-12594 // =0xffffcece +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: add w0, w8, w9, lsr #16 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + ret i32 %m +} + +define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2_gt_const: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w9, [x0] +; CHECK-NEXT: mov w8, #-12594 // =0xffffcece +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: add w8, w8, w9, lsr #16 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: sub w8, w8, w9, lsr #16 +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_gt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: sub w8, w8, w9, lsr #16 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length2_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: mov w9, #12849 // =0x3231 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2_eq_nobuiltin_attr: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #2 // =0x2 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length3: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrb w8, [x0, #2] +; CHECK-NEXT: ldrh w9, [x0] +; CHECK-NEXT: ldrb w10, [x1, #2] +; CHECK-NEXT: ldrh w11, [x1] +; CHECK-NEXT: orr w8, w9, w8, lsl #16 +; CHECK-NEXT: orr w9, w11, w10, lsl #16 +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length3_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #2] +; CHECK-NEXT: ldrb w11, [x1, #2] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ccmp w10, w11, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length4: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length4_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length4_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_gt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length4_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w8, w8, w9 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length4_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #12849 // =0x3231 +; CHECK-NEXT: movk w9, #13363, lsl #16 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length5: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrb w8, [x0, #4] +; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: ldrb w10, [x1, #4] +; CHECK-NEXT: ldr w11, [x1] +; CHECK-NEXT: orr x8, x9, x8, lsl #32 +; CHECK-NEXT: orr x9, x11, x10, lsl #32 +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length5_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #4] +; CHECK-NEXT: ldrb w11, [x1, #4] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ccmp w10, w11, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length5_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length5_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrb w8, [x0, #4] +; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: ldrb w10, [x1, #4] +; CHECK-NEXT: ldr w11, [x1] +; CHECK-NEXT: orr x8, x9, x8, lsl #32 +; CHECK-NEXT: orr x9, x11, x10, lsl #32 +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length6(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length6: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0, #4] +; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: ldrh w10, [x1, #4] +; CHECK-NEXT: ldr w11, [x1] +; CHECK-NEXT: orr x8, x9, x8, lsl #32 +; CHECK-NEXT: orr x9, x11, x10, lsl #32 +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind + ret i32 %m +} + +define i32 @length7(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length7: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB22_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldur w8, [x0, #3] +; CHECK-NEXT: ldur w9, [x1, #3] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB22_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB22_3: // %res_block +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + ret i32 %m +} + +define i1 @length7_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length7_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB23_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldur w8, [x0, #3] +; CHECK-NEXT: ldur w9, [x1, #3] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB23_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB23_3: // %res_block +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length7_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length7_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: ldur w10, [x0, #3] +; CHECK-NEXT: ldur w11, [x1, #3] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ccmp w10, w11, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length8: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length8_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length8_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x9, #12592 // =0x3130 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: movk x9, #13106, lsl #16 +; CHECK-NEXT: movk x9, #13620, lsl #32 +; CHECK-NEXT: movk x9, #14134, lsl #48 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length9(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length9: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB28_2 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldrb w8, [x0, #8] +; CHECK-NEXT: ldrb w9, [x1, #8] +; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB28_2: // %res_block +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind + ret i32 %m +} + +define i1 @length9_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length9_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #8] +; CHECK-NEXT: ldrb w11, [x1, #8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length10(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length10: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB30_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldrh w8, [x0, #8] +; CHECK-NEXT: ldrh w9, [x1, #8] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: lsr w9, w9, #16 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB30_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB30_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind + ret i32 %m +} + +define i1 @length10_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length10_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldrh w10, [x0, #8] +; CHECK-NEXT: ldrh w11, [x1, #8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length11(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length11: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB32_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldur x8, [x0, #3] +; CHECK-NEXT: ldur x9, [x1, #3] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB32_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB32_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind + ret i32 %m +} + +define i1 @length11_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length11_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldur x10, [x0, #3] +; CHECK-NEXT: ldur x11, [x1, #3] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length12_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldr w10, [x0, #8] +; CHECK-NEXT: ldr w11, [x1, #8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length12: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB35_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr w8, [x0, #8] +; CHECK-NEXT: ldr w9, [x1, #8] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB35_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB35_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + ret i32 %m +} + +define i1 @length13_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length13_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldur x10, [x0, #5] +; CHECK-NEXT: ldur x11, [x1, #5] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length14_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length14_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldur x10, [x0, #6] +; CHECK-NEXT: ldur x11, [x1, #6] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length15(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length15: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB38_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldur x8, [x0, #7] +; CHECK-NEXT: ldur x9, [x1, #7] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB38_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB38_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + ret i32 %m +} + +define i1 @length15_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length15_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB39_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldur x8, [x0, #7] +; CHECK-NEXT: ldur x9, [x1, #7] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB39_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB39_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length15_const(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length15_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #14136 // =0x3738 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: movk x8, #13622, lsl #16 +; CHECK-NEXT: movk x8, #13108, lsl #32 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: movk x8, #12594, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.ne .LBB40_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: mov x8, #13365 // =0x3435 +; CHECK-NEXT: ldur x9, [x0, #7] +; CHECK-NEXT: movk x8, #12851, lsl #16 +; CHECK-NEXT: movk x8, #12337, lsl #32 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: movk x8, #14393, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.ne .LBB40_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB40_3: // %res_block +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind + ret i32 %m +} + +define i1 @length15_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length15_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldur x10, [x0, #7] +; CHECK-NEXT: ldur x11, [x1, #7] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length15_gt_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #14136 // =0x3738 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: movk x8, #13622, lsl #16 +; CHECK-NEXT: movk x8, #13108, lsl #32 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: movk x8, #12594, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.ne .LBB42_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: mov x8, #13365 // =0x3435 +; CHECK-NEXT: ldur x9, [x0, #7] +; CHECK-NEXT: movk x8, #12851, lsl #16 +; CHECK-NEXT: movk x8, #12337, lsl #32 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: movk x8, #14393, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.ne .LBB42_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB42_4 +; CHECK-NEXT: .LBB42_3: // %res_block +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB42_4: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + + +define i32 @length16(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB43_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB43_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB43_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length16_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length16_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB45_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB45_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB45_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length16_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB46_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB46_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB46_4 +; CHECK-NEXT: .LBB46_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB46_4: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length16_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x8, #14648 // =0x3938 +; CHECK-NEXT: movk x8, #12592, lsl #16 +; CHECK-NEXT: movk x8, #13106, lsl #32 +; CHECK-NEXT: movk x8, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x8, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + + +define i32 @length24(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length24: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB48_4 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB48_4 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB48_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB48_4: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length24_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldr x12, [x0, #16] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldr x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x12, x13, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length24_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB50_4 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB50_4 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB50_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB50_4: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length24_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB51_4 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB51_4 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB51_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB51_5 +; CHECK-NEXT: .LBB51_4: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB51_5: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length24_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: ldr x11, [x0, #16] +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x8, #14648 // =0x3938 +; CHECK-NEXT: movk x8, #12592, lsl #16 +; CHECK-NEXT: movk x8, #13106, lsl #32 +; CHECK-NEXT: movk x8, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x8, #0, eq +; CHECK-NEXT: mov x8, #14134 // =0x3736 +; CHECK-NEXT: movk x8, #14648, lsl #16 +; CHECK-NEXT: movk x8, #12592, lsl #32 +; CHECK-NEXT: movk x8, #13106, lsl #48 +; CHECK-NEXT: ccmp x11, x8, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length31(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length31: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB53_5 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB53_5 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB53_5 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldur x8, [x0, #23] +; CHECK-NEXT: ldur x9, [x1, #23] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB53_5 +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB53_5: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind + ret i32 %m +} + +define i1 @length31_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length31_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldr x12, [x0, #16] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldr x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldur x8, [x0, #23] +; CHECK-NEXT: ldur x9, [x1, #23] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x12, x13, #0, eq +; CHECK-NEXT: ccmp x8, x9, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length31_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB55_5 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB55_5 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB55_5 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldur x8, [x0, #23] +; CHECK-NEXT: ldur x9, [x1, #23] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB55_5 +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB55_5: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length31_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB56_5 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB56_5 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB56_5 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldur x8, [x0, #23] +; CHECK-NEXT: ldur x9, [x1, #23] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB56_5 +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB56_6 +; CHECK-NEXT: .LBB56_5: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB56_6: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; CHECK-LABEL: length31_eq_prefer128: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldr x12, [x0, #16] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldr x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldur x8, [x0, #23] +; CHECK-NEXT: ldur x9, [x1, #23] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x12, x13, #0, eq +; CHECK-NEXT: ccmp x8, x9, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length31_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: ldr x11, [x0, #16] +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x8, #14648 // =0x3938 +; CHECK-NEXT: ldur x9, [x0, #23] +; CHECK-NEXT: movk x8, #12592, lsl #16 +; CHECK-NEXT: movk x8, #13106, lsl #32 +; CHECK-NEXT: movk x8, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x8, #0, eq +; CHECK-NEXT: mov x8, #14134 // =0x3736 +; CHECK-NEXT: movk x8, #14648, lsl #16 +; CHECK-NEXT: movk x8, #12592, lsl #32 +; CHECK-NEXT: movk x8, #13106, lsl #48 +; CHECK-NEXT: ccmp x11, x8, #0, eq +; CHECK-NEXT: mov x8, #13363 // =0x3433 +; CHECK-NEXT: movk x8, #13877, lsl #16 +; CHECK-NEXT: movk x8, #14391, lsl #32 +; CHECK-NEXT: movk x8, #12345, lsl #48 +; CHECK-NEXT: ccmp x9, x8, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB59_5 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB59_5 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB59_5 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB59_5 +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB59_5: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind + ret i32 %m +} + + +define i1 @length32_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length32_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length32_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB61_5 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB61_5 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB61_5 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB61_5 +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB61_5: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length32_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB62_5 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB62_5 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB62_5 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB62_5 +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB62_6 +; CHECK-NEXT: .LBB62_5: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB62_6: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; CHECK-LABEL: length32_eq_prefer128: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length32_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x8, #14648 // =0x3938 +; CHECK-NEXT: movk x8, #12592, lsl #16 +; CHECK-NEXT: ldp x9, x11, [x0, #16] +; CHECK-NEXT: movk x8, #13106, lsl #32 +; CHECK-NEXT: movk x8, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x8, #0, eq +; CHECK-NEXT: mov x8, #14134 // =0x3736 +; CHECK-NEXT: movk x8, #14648, lsl #16 +; CHECK-NEXT: movk x8, #12592, lsl #32 +; CHECK-NEXT: movk x8, #13106, lsl #48 +; CHECK-NEXT: ccmp x9, x8, #0, eq +; CHECK-NEXT: mov x8, #13620 // =0x3534 +; CHECK-NEXT: movk x8, #14134, lsl #16 +; CHECK-NEXT: movk x8, #14648, lsl #32 +; CHECK-NEXT: movk x8, #12592, lsl #48 +; CHECK-NEXT: ccmp x11, x8, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length48(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length48: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB65_7 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB65_7 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB65_7 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB65_7 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB65_7 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB65_7 +; CHECK-NEXT: // %bb.6: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB65_7: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind + ret i32 %m +} + +define i1 @length48_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length48_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldp x8, x11, [x0, #32] +; CHECK-NEXT: ldp x10, x12, [x1, #32] +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ccmp x8, x10, #0, eq +; CHECK-NEXT: ccmp x11, x12, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length48_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB67_7 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB67_7 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB67_7 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB67_7 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB67_7 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB67_7 +; CHECK-NEXT: // %bb.6: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB67_7: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length48_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB68_7 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB68_7 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB68_7 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB68_7 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB68_7 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB68_7 +; CHECK-NEXT: // %bb.6: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB68_8 +; CHECK-NEXT: .LBB68_7: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB68_8: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; CHECK-LABEL: length48_eq_prefer128: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldp x8, x11, [x0, #32] +; CHECK-NEXT: ldp x10, x12, [x1, #32] +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ccmp x8, x10, #0, eq +; CHECK-NEXT: ccmp x11, x12, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length48_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: ldp x11, x12, [x0, #16] +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x9, #14648 // =0x3938 +; CHECK-NEXT: movk x9, #12592, lsl #16 +; CHECK-NEXT: movk x9, #13106, lsl #32 +; CHECK-NEXT: movk x9, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x9, #0, eq +; CHECK-NEXT: mov x9, #14134 // =0x3736 +; CHECK-NEXT: movk x9, #14648, lsl #16 +; CHECK-NEXT: movk x9, #12592, lsl #32 +; CHECK-NEXT: movk x9, #13106, lsl #48 +; CHECK-NEXT: ccmp x11, x9, #0, eq +; CHECK-NEXT: mov x9, #13620 // =0x3534 +; CHECK-NEXT: movk x9, #14134, lsl #16 +; CHECK-NEXT: ldp x10, x11, [x0, #32] +; CHECK-NEXT: movk x9, #14648, lsl #32 +; CHECK-NEXT: movk x9, #12592, lsl #48 +; CHECK-NEXT: ccmp x12, x9, #0, eq +; CHECK-NEXT: mov x9, #13106 // =0x3332 +; CHECK-NEXT: movk x9, #13620, lsl #16 +; CHECK-NEXT: movk x9, #14134, lsl #32 +; CHECK-NEXT: movk x9, #14648, lsl #48 +; CHECK-NEXT: ccmp x10, x9, #0, eq +; CHECK-NEXT: ccmp x11, x8, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length63(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length63: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.6: // %loadbb6 +; CHECK-NEXT: ldr x8, [x0, #48] +; CHECK-NEXT: ldr x9, [x1, #48] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.7: // %loadbb7 +; CHECK-NEXT: ldur x8, [x0, #55] +; CHECK-NEXT: ldur x9, [x1, #55] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB71_9: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind + ret i32 %m +} + +define i1 @length63_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length63_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldp x8, x11, [x0, #32] +; CHECK-NEXT: ldp x10, x12, [x1, #32] +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ldr x9, [x0, #48] +; CHECK-NEXT: ldr x13, [x1, #48] +; CHECK-NEXT: ccmp x8, x10, #0, eq +; CHECK-NEXT: ldur x8, [x0, #55] +; CHECK-NEXT: ldur x10, [x1, #55] +; CHECK-NEXT: ccmp x11, x12, #0, eq +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ccmp x8, x10, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length63_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.6: // %loadbb6 +; CHECK-NEXT: ldr x8, [x0, #48] +; CHECK-NEXT: ldr x9, [x1, #48] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.7: // %loadbb7 +; CHECK-NEXT: ldur x8, [x0, #55] +; CHECK-NEXT: ldur x9, [x1, #55] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB73_9: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length63_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.6: // %loadbb6 +; CHECK-NEXT: ldr x8, [x0, #48] +; CHECK-NEXT: ldr x9, [x1, #48] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.7: // %loadbb7 +; CHECK-NEXT: ldur x8, [x0, #55] +; CHECK-NEXT: ldur x9, [x1, #55] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB74_10 +; CHECK-NEXT: .LBB74_9: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB74_10: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length63_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: ldp x11, x12, [x0, #16] +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x9, #14648 // =0x3938 +; CHECK-NEXT: movk x9, #12592, lsl #16 +; CHECK-NEXT: movk x9, #13106, lsl #32 +; CHECK-NEXT: movk x9, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x9, #0, eq +; CHECK-NEXT: mov x10, #14134 // =0x3736 +; CHECK-NEXT: movk x10, #14648, lsl #16 +; CHECK-NEXT: movk x10, #12592, lsl #32 +; CHECK-NEXT: movk x10, #13106, lsl #48 +; CHECK-NEXT: ccmp x11, x10, #0, eq +; CHECK-NEXT: mov x10, #13620 // =0x3534 +; CHECK-NEXT: movk x10, #14134, lsl #16 +; CHECK-NEXT: ldp x11, x13, [x0, #32] +; CHECK-NEXT: movk x10, #14648, lsl #32 +; CHECK-NEXT: movk x10, #12592, lsl #48 +; CHECK-NEXT: ccmp x12, x10, #0, eq +; CHECK-NEXT: mov x10, #13106 // =0x3332 +; CHECK-NEXT: ldr x12, [x0, #48] +; CHECK-NEXT: movk x10, #13620, lsl #16 +; CHECK-NEXT: movk x10, #14134, lsl #32 +; CHECK-NEXT: movk x10, #14648, lsl #48 +; CHECK-NEXT: ccmp x11, x10, #0, eq +; CHECK-NEXT: ldur x10, [x0, #55] +; CHECK-NEXT: ccmp x13, x8, #0, eq +; CHECK-NEXT: mov x8, #13877 // =0x3635 +; CHECK-NEXT: movk x8, #14391, lsl #16 +; CHECK-NEXT: ccmp x12, x9, #0, eq +; CHECK-NEXT: movk x8, #12345, lsl #32 +; CHECK-NEXT: movk x8, #12849, lsl #48 +; CHECK-NEXT: ccmp x10, x8, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.6: // %loadbb6 +; CHECK-NEXT: ldr x8, [x0, #48] +; CHECK-NEXT: ldr x9, [x1, #48] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.7: // %loadbb7 +; CHECK-NEXT: ldr x8, [x0, #56] +; CHECK-NEXT: ldr x9, [x1, #56] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB76_9: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length64_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldp x8, x11, [x0, #32] +; CHECK-NEXT: ldp x10, x12, [x1, #32] +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ldp x9, x13, [x1, #48] +; CHECK-NEXT: ccmp x8, x10, #0, eq +; CHECK-NEXT: ldp x8, x10, [x0, #48] +; CHECK-NEXT: ccmp x11, x12, #0, eq +; CHECK-NEXT: ccmp x8, x9, #0, eq +; CHECK-NEXT: ccmp x10, x13, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length64_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.6: // %loadbb6 +; CHECK-NEXT: ldr x8, [x0, #48] +; CHECK-NEXT: ldr x9, [x1, #48] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.7: // %loadbb7 +; CHECK-NEXT: ldr x8, [x0, #56] +; CHECK-NEXT: ldr x9, [x1, #56] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB78_9: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length64_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.6: // %loadbb6 +; CHECK-NEXT: ldr x8, [x0, #48] +; CHECK-NEXT: ldr x9, [x1, #48] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.7: // %loadbb7 +; CHECK-NEXT: ldr x8, [x0, #56] +; CHECK-NEXT: ldr x9, [x1, #56] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB79_10 +; CHECK-NEXT: .LBB79_9: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB79_10: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length64_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: ldp x11, x12, [x0, #16] +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: ldp x13, x14, [x0, #32] +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x9, #14648 // =0x3938 +; CHECK-NEXT: movk x9, #12592, lsl #16 +; CHECK-NEXT: movk x9, #13106, lsl #32 +; CHECK-NEXT: movk x9, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x9, #0, eq +; CHECK-NEXT: mov x10, #14134 // =0x3736 +; CHECK-NEXT: movk x10, #14648, lsl #16 +; CHECK-NEXT: movk x10, #12592, lsl #32 +; CHECK-NEXT: movk x10, #13106, lsl #48 +; CHECK-NEXT: ccmp x11, x10, #0, eq +; CHECK-NEXT: mov x11, #13620 // =0x3534 +; CHECK-NEXT: movk x11, #14134, lsl #16 +; CHECK-NEXT: movk x11, #14648, lsl #32 +; CHECK-NEXT: movk x11, #12592, lsl #48 +; CHECK-NEXT: ccmp x12, x11, #0, eq +; CHECK-NEXT: mov x11, #13106 // =0x3332 +; CHECK-NEXT: movk x11, #13620, lsl #16 +; CHECK-NEXT: movk x11, #14134, lsl #32 +; CHECK-NEXT: movk x11, #14648, lsl #48 +; CHECK-NEXT: ccmp x13, x11, #0, eq +; CHECK-NEXT: ldp x11, x12, [x0, #48] +; CHECK-NEXT: ccmp x14, x8, #0, eq +; CHECK-NEXT: ccmp x11, x9, #0, eq +; CHECK-NEXT: ccmp x12, x10, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length96(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length96: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #96 // =0x60 +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind + ret i32 %m +} + +define i1 @length96_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length96_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #96 // =0x60 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length96_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #96 // =0x60 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length96_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #96 // =0x60 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length96_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #96 // =0x60 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length127(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length127: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #127 // =0x7f +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind + ret i32 %m +} + +define i1 @length127_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length127_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #127 // =0x7f +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length127_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #127 // =0x7f +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length127_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #127 // =0x7f +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length127_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #127 // =0x7f +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length128(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length128: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #128 // =0x80 +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind + ret i32 %m +} + +define i1 @length128_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length128_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #128 // =0x80 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length128_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #128 // =0x80 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length128_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #128 // =0x80 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length128_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #128 // =0x80 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length192(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length192: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #192 // =0xc0 +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind + ret i32 %m +} + +define i1 @length192_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length192_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #192 // =0xc0 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length192_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #192 // =0xc0 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length192_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #192 // =0xc0 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length192_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #192 // =0xc0 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length255(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length255: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #255 // =0xff +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind + ret i32 %m +} + +define i1 @length255_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length255_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #255 // =0xff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length255_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #255 // =0xff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length255_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #255 // =0xff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length255_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #255 // =0xff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length256(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length256: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #256 // =0x100 +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind + ret i32 %m +} + +define i1 @length256_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length256_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #256 // =0x100 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length256_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #256 // =0x100 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length256_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #256 // =0x100 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length256_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #256 // =0x100 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length384(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length384: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #384 // =0x180 +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind + ret i32 %m +} + +define i1 @length384_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length384_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #384 // =0x180 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length384_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #384 // =0x180 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length384_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #384 // =0x180 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length384_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #384 // =0x180 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length511(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length511: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #511 // =0x1ff +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind + ret i32 %m +} + +define i1 @length511_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length511_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #511 // =0x1ff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length511_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #511 // =0x1ff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length511_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #511 // =0x1ff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length511_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #511 // =0x1ff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length512(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length512: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #512 // =0x200 +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind + ret i32 %m +} + +define i1 @length512_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length512_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #512 // =0x200 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length512_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #512 // =0x200 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length512_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #512 // =0x200 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length512_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #512 // =0x200 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @huge_length(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: huge_length: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x2, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind + ret i32 %m +} + +define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: huge_length_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov x2, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind { +; CHECK-LABEL: nonconst_length: +; CHECK: // %bb.0: +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind + ret i32 %m +} + +define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind { +; CHECK-LABEL: nonconst_length_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll new file mode 100644 index 00000000000000..54f8c7006bb516 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll @@ -0,0 +1,881 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -expandmemcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s + +declare i32 @memcmp(ptr nocapture, ptr nocapture, i64) + +define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp2( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP7]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2) + ret i32 %call +} + +define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonly align 2 %y) { +; CHECK-LABEL: define i32 @cmp2_align2( +; CHECK-SAME: ptr nocapture readonly align 2 [[X:%.*]], ptr nocapture readonly align 2 [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP7]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2) + ret i32 %call +} + +define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp3( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i24, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i24, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i24 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = zext i24 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: ret i32 [[TMP11]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 3) + ret i32 %call +} + +define i32 @cmp4(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp4( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: ret i32 [[TMP9]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4) + ret i32 %call +} + +define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp5( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i40, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i40, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i40 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i40 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: ret i32 [[TMP11]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 5) + ret i32 %call +} + +define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp6( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i48, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i48, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i48 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i48 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: ret i32 [[TMP11]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 6) + ret i32 %call +} + +define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp7( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 7) + ret i32 %call +} + +define i32 @cmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp8( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: ret i32 [[TMP9]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 8) + ret i32 %call +} + +define i32 @cmp9(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp9( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP5:%.*]], [[TMP6:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 9) + ret i32 %call +} + +define i32 @cmp10(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp10( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP10]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP11]]) +; CHECK-NEXT: [[TMP14]] = zext i16 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP15]] = zext i16 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 10) + ret i32 %call +} + +define i32 @cmp11(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp11( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 11) + ret i32 %call +} + +define i32 @cmp12(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp12( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; CHECK-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 12) + ret i32 %call +} + +define i32 @cmp13(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp13( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 13) + ret i32 %call +} + +define i32 @cmp14(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp14( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 14) + ret i32 %call +} + +define i32 @cmp15(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp15( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 15) + ret i32 %call +} + +define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp16( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) + ret i32 %call +} + +define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq2( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq3( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 3) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq4( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq5( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 5) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq6( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 6) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture readonly align 4 %y) { +; CHECK-LABEL: define i32 @cmp_eq6_align4( +; CHECK-SAME: ptr nocapture readonly align 4 [[X:%.*]], ptr nocapture readonly align 4 [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 6) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq7( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 7) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq8( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 8) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq9( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 9) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq10( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 10) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq11( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 11) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq12( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 12) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq13( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 13) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq14( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 14) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq15( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 15) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq16( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} From 134c91595568ea1335b22e559f20c1a488ea270e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kokem=C3=BCller?= Date: Mon, 30 Oct 2023 19:56:03 +0100 Subject: [PATCH 063/144] [libc++] Fix UB in related to "has value" flag (#68552) (#68733) The calls to std::construct_at might overwrite the previously set __has_value_ flag in the case where the flag is overlapping with the actual value or error being stored (since we use [[no_unique_address]]). To fix this issue, this patch ensures that we initialize the __has_value_ flag after we call std::construct_at. Fixes #68552 --- libcxx/include/__expected/expected.h | 177 ++++++++---------- .../assign/emplace.intializer_list.pass.cpp | 8 + .../expected.expected/assign/emplace.pass.cpp | 7 + .../ctor/ctor.convert.copy.pass.cpp | 11 +- .../ctor/ctor.convert.move.pass.cpp | 11 +- .../expected.expected/ctor/ctor.copy.pass.cpp | 18 +- .../ctor/ctor.default.pass.cpp | 5 +- .../ctor/ctor.inplace.pass.cpp | 18 +- .../ctor/ctor.inplace_init_list.pass.cpp | 9 +- .../expected.expected/ctor/ctor.move.pass.cpp | 20 +- .../expected.expected/ctor/ctor.u.pass.cpp | 18 +- .../ctor/ctor.unexpect.pass.cpp | 18 +- .../ctor/ctor.unexpect_init_list.pass.cpp | 9 +- .../ctor/ctor.unexpected.copy.pass.cpp | 8 +- .../ctor/ctor.unexpected.move.pass.cpp | 8 +- .../observers/has_value.pass.cpp | 32 ++++ .../expected.expected/swap/free.swap.pass.cpp | 64 ++++++- .../swap/member.swap.pass.cpp | 64 ++++++- .../ctor/ctor.convert.copy.pass.cpp | 11 +- .../ctor/ctor.convert.move.pass.cpp | 11 +- .../expected.void/ctor/ctor.copy.pass.cpp | 11 +- .../expected.void/ctor/ctor.move.pass.cpp | 12 +- .../expected.void/ctor/ctor.unexpect.pass.cpp | 6 +- .../ctor/ctor.unexpect_init_list.pass.cpp | 9 +- .../ctor/ctor.unexpected.copy.pass.cpp | 4 +- .../ctor/ctor.unexpected.move.pass.cpp | 4 +- .../observers/has_value.pass.cpp | 12 ++ .../expected.void/swap/free.swap.pass.cpp | 32 +++- .../expected.void/swap/member.swap.pass.cpp | 30 ++- libcxx/test/std/utilities/expected/types.h | 49 +++++ 30 files changed, 534 insertions(+), 162 deletions(-) diff --git a/libcxx/include/__expected/expected.h b/libcxx/include/__expected/expected.h index 045370a486fae6..bf16c8f720d268 100644 --- a/libcxx/include/__expected/expected.h +++ b/libcxx/include/__expected/expected.h @@ -119,9 +119,7 @@ class expected { _LIBCPP_HIDE_FROM_ABI constexpr expected() noexcept(is_nothrow_default_constructible_v<_Tp>) // strengthened requires is_default_constructible_v<_Tp> - : __has_val_(true) { - std::construct_at(std::addressof(__union_.__val_)); - } + : __union_(std::in_place), __has_val_(true) {} _LIBCPP_HIDE_FROM_ABI constexpr expected(const expected&) = delete; @@ -136,14 +134,7 @@ class expected { noexcept(is_nothrow_copy_constructible_v<_Tp> && is_nothrow_copy_constructible_v<_Err>) // strengthened requires(is_copy_constructible_v<_Tp> && is_copy_constructible_v<_Err> && !(is_trivially_copy_constructible_v<_Tp> && is_trivially_copy_constructible_v<_Err>)) - : __has_val_(__other.__has_val_) { - if (__has_val_) { - std::construct_at(std::addressof(__union_.__val_), __other.__union_.__val_); - } else { - std::construct_at(std::addressof(__union_.__unex_), __other.__union_.__unex_); - } - } - + : __union_(__other.__has_val_, __other.__union_), __has_val_(__other.__has_val_) { } _LIBCPP_HIDE_FROM_ABI constexpr expected(expected&&) requires(is_move_constructible_v<_Tp> && is_move_constructible_v<_Err> @@ -154,13 +145,7 @@ class expected { noexcept(is_nothrow_move_constructible_v<_Tp> && is_nothrow_move_constructible_v<_Err>) requires(is_move_constructible_v<_Tp> && is_move_constructible_v<_Err> && !(is_trivially_move_constructible_v<_Tp> && is_trivially_move_constructible_v<_Err>)) - : __has_val_(__other.__has_val_) { - if (__has_val_) { - std::construct_at(std::addressof(__union_.__val_), std::move(__other.__union_.__val_)); - } else { - std::construct_at(std::addressof(__union_.__unex_), std::move(__other.__union_.__unex_)); - } - } + : __union_(__other.__has_val_, std::move(__other.__union_)), __has_val_(__other.__has_val_) { } private: template @@ -200,26 +185,14 @@ class expected { expected(const expected<_Up, _OtherErr>& __other) noexcept(is_nothrow_constructible_v<_Tp, const _Up&> && is_nothrow_constructible_v<_Err, const _OtherErr&>) // strengthened - : __has_val_(__other.__has_val_) { - if (__has_val_) { - std::construct_at(std::addressof(__union_.__val_), __other.__union_.__val_); - } else { - std::construct_at(std::addressof(__union_.__unex_), __other.__union_.__unex_); - } - } + : __union_(__other.__has_val_, __other.__union_), __has_val_(__other.__has_val_) {} template requires __can_convert<_Up, _OtherErr, _Up, _OtherErr>::value _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v<_Up, _Tp> || !is_convertible_v<_OtherErr, _Err>) expected(expected<_Up, _OtherErr>&& __other) noexcept(is_nothrow_constructible_v<_Tp, _Up> && is_nothrow_constructible_v<_Err, _OtherErr>) // strengthened - : __has_val_(__other.__has_val_) { - if (__has_val_) { - std::construct_at(std::addressof(__union_.__val_), std::move(__other.__union_.__val_)); - } else { - std::construct_at(std::addressof(__union_.__unex_), std::move(__other.__union_.__unex_)); - } - } + : __union_(__other.__has_val_, std::move(__other.__union_)), __has_val_(__other.__has_val_) {} template requires(!is_same_v, in_place_t> && !is_same_v> && @@ -227,61 +200,47 @@ class expected { (!is_same_v, bool> || !__is_std_expected>::value)) _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v<_Up, _Tp>) expected(_Up&& __u) noexcept(is_nothrow_constructible_v<_Tp, _Up>) // strengthened - : __has_val_(true) { - std::construct_at(std::addressof(__union_.__val_), std::forward<_Up>(__u)); - } + : __union_(std::in_place, std::forward<_Up>(__u)), __has_val_(true) {} template requires is_constructible_v<_Err, const _OtherErr&> _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v) expected(const unexpected<_OtherErr>& __unex) noexcept(is_nothrow_constructible_v<_Err, const _OtherErr&>) // strengthened - : __has_val_(false) { - std::construct_at(std::addressof(__union_.__unex_), __unex.error()); - } + : __union_(std::unexpect, __unex.error()), __has_val_(false) {} template requires is_constructible_v<_Err, _OtherErr> _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v<_OtherErr, _Err>) expected(unexpected<_OtherErr>&& __unex) noexcept(is_nothrow_constructible_v<_Err, _OtherErr>) // strengthened - : __has_val_(false) { - std::construct_at(std::addressof(__union_.__unex_), std::move(__unex.error())); - } + : __union_(std::unexpect, std::move(__unex.error())), __has_val_(false) {} template requires is_constructible_v<_Tp, _Args...> _LIBCPP_HIDE_FROM_ABI constexpr explicit expected(in_place_t, _Args&&... __args) noexcept(is_nothrow_constructible_v<_Tp, _Args...>) // strengthened - : __has_val_(true) { - std::construct_at(std::addressof(__union_.__val_), std::forward<_Args>(__args)...); - } + : __union_(std::in_place, std::forward<_Args>(__args)...), __has_val_(true) {} template requires is_constructible_v< _Tp, initializer_list<_Up>&, _Args... > _LIBCPP_HIDE_FROM_ABI constexpr explicit expected(in_place_t, initializer_list<_Up> __il, _Args&&... __args) noexcept(is_nothrow_constructible_v<_Tp, initializer_list<_Up>&, _Args...>) // strengthened - : __has_val_(true) { - std::construct_at(std::addressof(__union_.__val_), __il, std::forward<_Args>(__args)...); - } + : __union_(std::in_place, __il, std::forward<_Args>(__args)...), __has_val_(true) {} template requires is_constructible_v<_Err, _Args...> _LIBCPP_HIDE_FROM_ABI constexpr explicit expected(unexpect_t, _Args&&... __args) - noexcept(is_nothrow_constructible_v<_Err, _Args...>) // strengthened - : __has_val_(false) { - std::construct_at(std::addressof(__union_.__unex_), std::forward<_Args>(__args)...); - } + noexcept(is_nothrow_constructible_v<_Err, _Args...>) // strengthened + : __union_(std::unexpect, std::forward<_Args>(__args)...), __has_val_(false) {} template requires is_constructible_v< _Err, initializer_list<_Up>&, _Args... > _LIBCPP_HIDE_FROM_ABI constexpr explicit expected(unexpect_t, initializer_list<_Up> __il, _Args&&... __args) noexcept(is_nothrow_constructible_v<_Err, initializer_list<_Up>&, _Args...>) // strengthened - : __has_val_(false) { - std::construct_at(std::addressof(__union_.__unex_), __il, std::forward<_Args>(__args)...); - } + : __union_(std::unexpect, __il, std::forward<_Args>(__args)...), __has_val_(false) {} // [expected.object.dtor], destructor @@ -440,9 +399,10 @@ class expected { std::destroy_at(std::addressof(__union_.__val_)); } else { std::destroy_at(std::addressof(__union_.__unex_)); - __has_val_ = true; } - return *std::construct_at(std::addressof(__union_.__val_), std::forward<_Args>(__args)...); + std::construct_at(std::addressof(__union_.__val_), std::forward<_Args>(__args)...); + __has_val_ = true; + return __union_.__val_; } template @@ -452,9 +412,10 @@ class expected { std::destroy_at(std::addressof(__union_.__val_)); } else { std::destroy_at(std::addressof(__union_.__unex_)); - __has_val_ = true; } - return *std::construct_at(std::addressof(__union_.__val_), __il, std::forward<_Args>(__args)...); + std::construct_at(std::addressof(__union_.__val_), __il, std::forward<_Args>(__args)...); + __has_val_ = true; + return __union_.__val_; } @@ -893,11 +854,15 @@ class expected { } private: - struct __empty_t {}; - template union __union_t { - _LIBCPP_HIDE_FROM_ABI constexpr __union_t() {} + template + _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t(std::in_place_t, _Args&&... __args) + : __val_(std::forward<_Args>(__args)...) {} + + template + _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t(std::unexpect_t, _Args&&... __args) + : __unex_(std::forward<_Args>(__args)...) {} template _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t( @@ -909,6 +874,14 @@ class expected { std::__expected_construct_unexpected_from_invoke_tag, _Func&& __f, _Args&&... __args) : __unex_(std::invoke(std::forward<_Func>(__f), std::forward<_Args>(__args)...)) {} + template + _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t(bool __has_val, _Union&& __other) { + if (__has_val) + std::construct_at(std::addressof(__val_), std::forward<_Union>(__other).__val_); + else + std::construct_at(std::addressof(__unex_), std::forward<_Union>(__other).__unex_); + } + _LIBCPP_HIDE_FROM_ABI constexpr ~__union_t() requires(is_trivially_destructible_v<_ValueType> && is_trivially_destructible_v<_ErrorType>) = default; @@ -927,10 +900,17 @@ class expected { template requires(is_trivially_move_constructible_v<_ValueType> && is_trivially_move_constructible_v<_ErrorType>) union __union_t<_ValueType, _ErrorType> { - _LIBCPP_HIDE_FROM_ABI constexpr __union_t() : __empty_() {} _LIBCPP_HIDE_FROM_ABI constexpr __union_t(const __union_t&) = default; _LIBCPP_HIDE_FROM_ABI constexpr __union_t& operator=(const __union_t&) = default; + template + _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t(std::in_place_t, _Args&&... __args) + : __val_(std::forward<_Args>(__args)...) {} + + template + _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t(std::unexpect_t, _Args&&... __args) + : __unex_(std::forward<_Args>(__args)...) {} + template _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t( std::__expected_construct_in_place_from_invoke_tag, _Func&& __f, _Args&&... __args) @@ -941,6 +921,14 @@ class expected { std::__expected_construct_unexpected_from_invoke_tag, _Func&& __f, _Args&&... __args) : __unex_(std::invoke(std::forward<_Func>(__f), std::forward<_Args>(__args)...)) {} + template + _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t(bool __has_val, _Union&& __other) { + if (__has_val) + std::construct_at(std::addressof(__val_), std::forward<_Union>(__other).__val_); + else + std::construct_at(std::addressof(__unex_), std::forward<_Union>(__other).__unex_); + } + _LIBCPP_HIDE_FROM_ABI constexpr ~__union_t() requires(is_trivially_destructible_v<_ValueType> && is_trivially_destructible_v<_ErrorType>) = default; @@ -950,7 +938,6 @@ class expected { requires(!is_trivially_destructible_v<_ValueType> || !is_trivially_destructible_v<_ErrorType>) {} - _LIBCPP_NO_UNIQUE_ADDRESS __empty_t __empty_; _LIBCPP_NO_UNIQUE_ADDRESS _ValueType __val_; _LIBCPP_NO_UNIQUE_ADDRESS _ErrorType __unex_; }; @@ -998,11 +985,7 @@ class expected<_Tp, _Err> { _LIBCPP_HIDE_FROM_ABI constexpr expected(const expected& __rhs) noexcept(is_nothrow_copy_constructible_v<_Err>) // strengthened requires(is_copy_constructible_v<_Err> && !is_trivially_copy_constructible_v<_Err>) - : __has_val_(__rhs.__has_val_) { - if (!__rhs.__has_val_) { - std::construct_at(std::addressof(__union_.__unex_), __rhs.__union_.__unex_); - } - } + : __union_(__rhs.__has_val_, __rhs.__union_), __has_val_(__rhs.__has_val_) {} _LIBCPP_HIDE_FROM_ABI constexpr expected(expected&&) requires(is_move_constructible_v<_Err> && is_trivially_move_constructible_v<_Err>) @@ -1011,51 +994,35 @@ class expected<_Tp, _Err> { _LIBCPP_HIDE_FROM_ABI constexpr expected(expected&& __rhs) noexcept(is_nothrow_move_constructible_v<_Err>) requires(is_move_constructible_v<_Err> && !is_trivially_move_constructible_v<_Err>) - : __has_val_(__rhs.__has_val_) { - if (!__rhs.__has_val_) { - std::construct_at(std::addressof(__union_.__unex_), std::move(__rhs.__union_.__unex_)); - } - } + : __union_(__rhs.__has_val_, std::move(__rhs.__union_)), __has_val_(__rhs.__has_val_) {} template requires __can_convert<_Up, _OtherErr, const _OtherErr&>::value _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v) expected(const expected<_Up, _OtherErr>& __rhs) noexcept(is_nothrow_constructible_v<_Err, const _OtherErr&>) // strengthened - : __has_val_(__rhs.__has_val_) { - if (!__rhs.__has_val_) { - std::construct_at(std::addressof(__union_.__unex_), __rhs.__union_.__unex_); - } - } + : __union_(__rhs.__has_val_, __rhs.__union_), __has_val_(__rhs.__has_val_) {} template requires __can_convert<_Up, _OtherErr, _OtherErr>::value _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v<_OtherErr, _Err>) expected(expected<_Up, _OtherErr>&& __rhs) noexcept(is_nothrow_constructible_v<_Err, _OtherErr>) // strengthened - : __has_val_(__rhs.__has_val_) { - if (!__rhs.__has_val_) { - std::construct_at(std::addressof(__union_.__unex_), std::move(__rhs.__union_.__unex_)); - } - } + : __union_(__rhs.__has_val_, std::move(__rhs.__union_)), __has_val_(__rhs.__has_val_) {} template requires is_constructible_v<_Err, const _OtherErr&> _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v) expected(const unexpected<_OtherErr>& __unex) noexcept(is_nothrow_constructible_v<_Err, const _OtherErr&>) // strengthened - : __has_val_(false) { - std::construct_at(std::addressof(__union_.__unex_), __unex.error()); - } + : __union_(std::unexpect, __unex.error()), __has_val_(false) {} template requires is_constructible_v<_Err, _OtherErr> _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v<_OtherErr, _Err>) expected(unexpected<_OtherErr>&& __unex) noexcept(is_nothrow_constructible_v<_Err, _OtherErr>) // strengthened - : __has_val_(false) { - std::construct_at(std::addressof(__union_.__unex_), std::move(__unex.error())); - } + : __union_(std::unexpect, std::move(__unex.error())), __has_val_(false) {} _LIBCPP_HIDE_FROM_ABI constexpr explicit expected(in_place_t) noexcept : __has_val_(true) {} @@ -1063,17 +1030,13 @@ class expected<_Tp, _Err> { requires is_constructible_v<_Err, _Args...> _LIBCPP_HIDE_FROM_ABI constexpr explicit expected(unexpect_t, _Args&&... __args) noexcept(is_nothrow_constructible_v<_Err, _Args...>) // strengthened - : __has_val_(false) { - std::construct_at(std::addressof(__union_.__unex_), std::forward<_Args>(__args)...); - } + : __union_(std::unexpect, std::forward<_Args>(__args)...), __has_val_(false) {} template requires is_constructible_v< _Err, initializer_list<_Up>&, _Args... > _LIBCPP_HIDE_FROM_ABI constexpr explicit expected(unexpect_t, initializer_list<_Up> __il, _Args&&... __args) noexcept(is_nothrow_constructible_v<_Err, initializer_list<_Up>&, _Args...>) // strengthened - : __has_val_(false) { - std::construct_at(std::addressof(__union_.__unex_), __il, std::forward<_Args>(__args)...); - } + : __union_(std::unexpect, __il, std::forward<_Args>(__args)...), __has_val_(false) {} private: template @@ -1507,11 +1470,23 @@ class expected<_Tp, _Err> { union __union_t { _LIBCPP_HIDE_FROM_ABI constexpr __union_t() : __empty_() {} + template + _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t(std::unexpect_t, _Args&&... __args) + : __unex_(std::forward<_Args>(__args)...) {} + template _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t( __expected_construct_unexpected_from_invoke_tag, _Func&& __f, _Args&&... __args) : __unex_(std::invoke(std::forward<_Func>(__f), std::forward<_Args>(__args)...)) {} + template + _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t(bool __has_val, _Union&& __other) { + if (__has_val) + std::construct_at(std::addressof(__empty_)); + else + std::construct_at(std::addressof(__unex_), std::forward<_Union>(__other).__unex_); + } + _LIBCPP_HIDE_FROM_ABI constexpr ~__union_t() requires(is_trivially_destructible_v<_ErrorType>) = default; @@ -1534,11 +1509,23 @@ class expected<_Tp, _Err> { _LIBCPP_HIDE_FROM_ABI constexpr __union_t(const __union_t&) = default; _LIBCPP_HIDE_FROM_ABI constexpr __union_t& operator=(const __union_t&) = default; + template + _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t(std::unexpect_t, _Args&&... __args) + : __unex_(std::forward<_Args>(__args)...) {} + template _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t( __expected_construct_unexpected_from_invoke_tag, _Func&& __f, _Args&&... __args) : __unex_(std::invoke(std::forward<_Func>(__f), std::forward<_Args>(__args)...)) {} + template + _LIBCPP_HIDE_FROM_ABI constexpr explicit __union_t(bool __has_val, _Union&& __other) { + if (__has_val) + std::construct_at(std::addressof(__empty_)); + else + std::construct_at(std::addressof(__unex_), std::forward<_Union>(__other).__unex_); + } + _LIBCPP_HIDE_FROM_ABI constexpr ~__union_t() requires(is_trivially_destructible_v<_ErrorType>) = default; diff --git a/libcxx/test/std/utilities/expected/expected.expected/assign/emplace.intializer_list.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/assign/emplace.intializer_list.pass.cpp index 3cdfcde3f4d694..922200a8c0263d 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/assign/emplace.intializer_list.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/assign/emplace.intializer_list.pass.cpp @@ -81,6 +81,14 @@ constexpr bool test() { assert(e.value().i == 10); } + // TailClobberer + { + std::expected, bool> e(std::unexpect); + auto list = {4, 5, 6}; + e.emplace(list); + assert(e.has_value()); + } + return true; } diff --git a/libcxx/test/std/utilities/expected/expected.expected/assign/emplace.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/assign/emplace.pass.cpp index c62e6289350201..491de2dff03312 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/assign/emplace.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/assign/emplace.pass.cpp @@ -73,6 +73,13 @@ constexpr bool test() { assert(e.value() == 10); } + // TailClobberer + { + std::expected, bool> e(std::unexpect); + e.emplace(); + assert(e.has_value()); + } + return true; } diff --git a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.convert.copy.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.convert.copy.pass.cpp index 9274b9a2c030e5..16de28d9703961 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.convert.copy.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.convert.copy.pass.cpp @@ -45,6 +45,7 @@ #include #include "test_macros.h" +#include "../../types.h" // Test Constraints: template @@ -161,13 +162,19 @@ constexpr bool test() { assert(e1.error() == 5); } + // convert TailClobberer + { + const std::expected, char> e1; + std::expected, char> e2 = e1; + assert(e2.has_value()); + assert(e1.has_value()); + } + return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct ThrowingInt { ThrowingInt(int) { throw Except{}; } }; diff --git a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.convert.move.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.convert.move.pass.cpp index 71979311bfd103..0e30ea2c7fe0b4 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.convert.move.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.convert.move.pass.cpp @@ -46,6 +46,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints: template @@ -160,13 +161,19 @@ constexpr bool test() { assert(e1.error().get() == 0); } + // convert TailClobberer + { + std::expected, char> e1; + std::expected, char> e2 = std::move(e1); + assert(e2.has_value()); + assert(e1.has_value()); + } + return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct ThrowingInt { ThrowingInt(int) { throw Except{}; } }; diff --git a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.copy.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.copy.pass.cpp index 77d73485025ab6..581df51207da29 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.copy.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.copy.pass.cpp @@ -30,6 +30,7 @@ #include #include "test_macros.h" +#include "../../types.h" struct NonCopyable { NonCopyable(const NonCopyable&) = delete; @@ -93,13 +94,26 @@ constexpr bool test() { assert(!e2.has_value()); assert(e2.error() == 5); } + + // copy TailClobberer as value + { + const std::expected, bool> e1; + auto e2 = e1; + assert(e2.has_value()); + } + + // copy TailClobberer as error + { + const std::expected> e1(std::unexpect); + auto e2 = e1; + assert(!e2.has_value()); + } + return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing() = default; Throwing(const Throwing&) { throw Except{}; } diff --git a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.default.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.default.pass.cpp index 431e604e8b692f..dcd046bdd9d893 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.default.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.default.pass.cpp @@ -22,6 +22,7 @@ #include #include "test_macros.h" +#include "../../types.h" struct NoDedefaultCtor { NoDedefaultCtor() = delete; @@ -45,6 +46,7 @@ constexpr void testDefaultCtor() { template constexpr void testTypes() { + testDefaultCtor(); testDefaultCtor(); testDefaultCtor(); } @@ -52,13 +54,12 @@ constexpr void testTypes() { constexpr bool test() { testTypes(); testTypes(); + testTypes>(); return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing() { throw Except{}; }; }; diff --git a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.inplace.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.inplace.pass.cpp index 92952551711e04..88ec41939439af 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.inplace.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.inplace.pass.cpp @@ -26,6 +26,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints: static_assert(std::is_constructible_v, std::in_place_t>); @@ -54,24 +55,24 @@ struct CopyOnly { friend constexpr bool operator==(const CopyOnly& mi, int ii) { return mi.i == ii; } }; -template +template constexpr void testInt() { - std::expected e(std::in_place, 5); + std::expected e(std::in_place, 5); assert(e.has_value()); assert(e.value() == 5); } -template +template constexpr void testLValue() { T t(5); - std::expected e(std::in_place, t); + std::expected e(std::in_place, t); assert(e.has_value()); assert(e.value() == 5); } -template +template constexpr void testRValue() { - std::expected e(std::in_place, T(5)); + std::expected e(std::in_place, T(5)); assert(e.has_value()); assert(e.value() == 5); } @@ -80,10 +81,13 @@ constexpr bool test() { testInt(); testInt(); testInt(); + testInt, bool>(); testLValue(); testLValue(); + testLValue, bool>(); testRValue(); testRValue(); + testRValue, bool>(); // no arg { @@ -111,8 +115,6 @@ constexpr bool test() { void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing(int) { throw Except{}; }; }; diff --git a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.inplace_init_list.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.inplace_init_list.pass.cpp index b4cad54b860e92..a97086fcc9195b 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.inplace_init_list.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.inplace_init_list.pass.cpp @@ -28,6 +28,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints: static_assert( @@ -90,13 +91,17 @@ constexpr bool test() { assert(m.get() == 0); } + // TailClobberer + { + std::expected, bool> e(std::in_place, {1, 2, 3}); + assert(e.has_value()); + } + return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing(std::initializer_list, int) { throw Except{}; }; }; diff --git a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.move.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.move.pass.cpp index 5e6749e50c16c9..cd89e2445860a2 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.move.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.move.pass.cpp @@ -32,6 +32,7 @@ #include #include "test_macros.h" +#include "../../types.h" struct NonMovable { NonMovable(NonMovable&&) = delete; @@ -112,13 +113,28 @@ constexpr bool test() { assert(e2.error() == 5); assert(!e1.has_value()); } + + // move TailClobbererNonTrivialMove as value + { + std::expected, bool> e1; + auto e2 = std::move(e1); + assert(e2.has_value()); + assert(e1.has_value()); + } + + // move TailClobbererNonTrivialMove as error + { + std::expected> e1(std::unexpect); + auto e2 = std::move(e1); + assert(!e2.has_value()); + assert(!e1.has_value()); + } + return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing() = default; Throwing(Throwing&&) { throw Except{}; } diff --git a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.u.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.u.pass.cpp index 9e82943f9f314d..1cf3d9cc2ef49c 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.u.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.u.pass.cpp @@ -29,6 +29,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints: static_assert(std::is_constructible_v, int>); @@ -70,24 +71,24 @@ struct CopyOnly { struct BaseError {}; struct DerivedError : BaseError {}; -template +template constexpr void testInt() { - std::expected e(5); + std::expected e(5); assert(e.has_value()); assert(e.value() == 5); } -template +template constexpr void testLValue() { T t(5); - std::expected e(t); + std::expected e(t); assert(e.has_value()); assert(e.value() == 5); } -template +template constexpr void testRValue() { - std::expected e(T(5)); + std::expected e(T(5)); assert(e.has_value()); assert(e.value() == 5); } @@ -96,10 +97,13 @@ constexpr bool test() { testInt(); testInt(); testInt(); + testInt, bool>(); testLValue(); testLValue(); + testLValue, bool>(); testRValue(); testRValue(); + testRValue, bool>(); // Test default template argument. // Without it, the template parameter cannot be deduced from an initializer list @@ -153,8 +157,6 @@ constexpr bool test() { void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing(int) { throw Except{}; }; }; diff --git a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpect.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpect.pass.cpp index 5a78e41dfcae25..27ce97737d288b 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpect.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpect.pass.cpp @@ -26,6 +26,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints: static_assert(std::is_constructible_v, std::unexpect_t>); @@ -54,24 +55,24 @@ struct CopyOnly { friend constexpr bool operator==(const CopyOnly& mi, int ii) { return mi.i == ii; } }; -template +template constexpr void testInt() { - std::expected e(std::unexpect, 5); + std::expected e(std::unexpect, 5); assert(!e.has_value()); assert(e.error() == 5); } -template +template constexpr void testLValue() { T t(5); - std::expected e(std::unexpect, t); + std::expected e(std::unexpect, t); assert(!e.has_value()); assert(e.error() == 5); } -template +template constexpr void testRValue() { - std::expected e(std::unexpect, T(5)); + std::expected e(std::unexpect, T(5)); assert(!e.has_value()); assert(e.error() == 5); } @@ -80,10 +81,13 @@ constexpr bool test() { testInt(); testInt(); testInt(); + testInt, bool>(); testLValue(); testLValue(); + testLValue, bool>(); testRValue(); testRValue(); + testRValue, bool>(); // no arg { @@ -111,8 +115,6 @@ constexpr bool test() { void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing(int) { throw Except{}; }; }; diff --git a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpect_init_list.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpect_init_list.pass.cpp index 7cc36b51e41534..4f5d3d1492d37a 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpect_init_list.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpect_init_list.pass.cpp @@ -28,6 +28,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints: static_assert( @@ -90,13 +91,17 @@ constexpr bool test() { assert(m.get() == 0); } + // TailClobberer + { + std::expected> e(std::unexpect, {1, 2, 3}); + assert(!e.has_value()); + } + return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing(std::initializer_list, int) { throw Except{}; }; }; diff --git a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpected.copy.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpected.copy.pass.cpp index 09ac91182b3b89..bbfd3048533c79 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpected.copy.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpected.copy.pass.cpp @@ -27,6 +27,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints static_assert(std::is_constructible_v, const std::unexpected&>); @@ -49,10 +50,10 @@ struct MyInt { friend constexpr bool operator==(const MyInt&, const MyInt&) = default; }; -template +template constexpr void testUnexpected() { const std::unexpected u(5); - std::expected e(u); + std::expected e(u); assert(!e.has_value()); assert(e.error() == 5); } @@ -60,13 +61,12 @@ constexpr void testUnexpected() { constexpr bool test() { testUnexpected(); testUnexpected(); + testUnexpected, bool>(); return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing(int) { throw Except{}; } }; diff --git a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpected.move.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpected.move.pass.cpp index 9aaaa3fe1a448d..800d47bda69587 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpected.move.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/ctor/ctor.unexpected.move.pass.cpp @@ -27,6 +27,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints static_assert(std::is_constructible_v, std::unexpected>); @@ -49,10 +50,10 @@ struct MyInt { friend constexpr bool operator==(const MyInt&, const MyInt&) = default; }; -template +template constexpr void testInt() { std::unexpected u(5); - std::expected e(std::move(u)); + std::expected e(std::move(u)); assert(!e.has_value()); assert(e.error() == 5); } @@ -69,14 +70,13 @@ constexpr bool test() { testInt(); testInt(); testInt(); + testInt, bool>(); testMoveOnly(); return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing(int) { throw Except{}; } }; diff --git a/libcxx/test/std/utilities/expected/expected.expected/observers/has_value.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/observers/has_value.pass.cpp index 27d657a065699e..2b24b0ac24ddb8 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/observers/has_value.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/observers/has_value.pass.cpp @@ -12,10 +12,12 @@ #include #include #include +#include #include #include #include "test_macros.h" +#include "../../types.h" // Test noexcept template @@ -43,6 +45,36 @@ constexpr bool test() { assert(!e.has_value()); } + // The following tests check that the "has_value" flag is not overwritten + // by the constructor of the value. This could happen because the flag is + // stored in the tail padding of the value. + // + // The first test is a simplified version of the real code where this was + // first observed. + // + // The other tests use a synthetic struct that clobbers its tail padding + // on construction, making the issue easier to reproduce. + // + // See https://github.com/llvm/llvm-project/issues/68552 and the linked PR. + { + auto f1 = [] -> std::expected, long> { return 0; }; + + auto f2 = [&f1] -> std::expected, int> { + return f1().transform_error([](auto) { return 0; }); + }; + + auto e = f2(); + assert(e.has_value()); + } + { + const std::expected, bool> e = {}; + // clang-cl does not support [[no_unique_address]] yet. +#if !(defined(TEST_COMPILER_CLANG) && defined(_MSC_VER)) + LIBCPP_STATIC_ASSERT(sizeof(TailClobberer<0>) == sizeof(e)); +#endif + assert(e.has_value()); + } + return true; } diff --git a/libcxx/test/std/utilities/expected/expected.expected/swap/free.swap.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/swap/free.swap.pass.cpp index 15c66d2b75076e..3c03efd8329196 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/swap/free.swap.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/swap/free.swap.pass.cpp @@ -129,7 +129,7 @@ constexpr bool test() { std::expected, TrackedMove> e1(std::in_place, 5); std::expected, TrackedMove> e2(std::unexpect, 10); - e1.swap(e2); + swap(e1, e2); assert(!e1.has_value()); assert(e1.error().i == 10); @@ -180,6 +180,35 @@ constexpr bool test() { assert(!e2.error().swapCalled); } + // TailClobberer + { + // is_nothrow_move_constructible_v + { + std::expected, TailClobbererNonTrivialMove<1, true>> x(std::in_place); + std::expected, TailClobbererNonTrivialMove<1, true>> y(std::unexpect); + + swap(x, y); + + // Both of these would fail if adjusting the "has value" flags happened + // _before_ constructing the member objects inside the `swap`. + assert(!x.has_value()); + assert(y.has_value()); + } + + // !is_nothrow_move_constructible_v + { + std::expected, TailClobbererNonTrivialMove<1, false>> x(std::in_place); + std::expected, TailClobbererNonTrivialMove<1, false>> y(std::unexpect); + + swap(x, y); + + // Both of these would fail if adjusting the "has value" flags happened + // _before_ constructing the member objects inside the `swap`. + assert(!x.has_value()); + assert(y.has_value()); + } + } + return true; } @@ -210,6 +239,39 @@ void testException() { assert(*e1 == 5); } } + + // TailClobberer + { + // is_nothrow_move_constructible_v + { + std::expected, TailClobbererNonTrivialMove<1>> x(std::in_place); + std::expected, TailClobbererNonTrivialMove<1>> y(std::unexpect); + try { + swap(x, y); + assert(false); + } catch (Except) { + assert(x.has_value()); + // This would fail if `TailClobbererNonTrivialMove<1>` clobbered the + // flag when rolling back the swap. + assert(!y.has_value()); + } + } + + // !is_nothrow_move_constructible_v + { + std::expected, TailClobbererNonTrivialMove<1, false, true>> x(std::in_place); + std::expected, TailClobbererNonTrivialMove<1, false, true>> y(std::unexpect); + try { + swap(x, y); + assert(false); + } catch (Except) { + // This would fail if `TailClobbererNonTrivialMove<0>` clobbered the + // flag when rolling back the swap. + assert(x.has_value()); + assert(!y.has_value()); + } + } + } #endif // TEST_HAS_NO_EXCEPTIONS } diff --git a/libcxx/test/std/utilities/expected/expected.expected/swap/member.swap.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/swap/member.swap.pass.cpp index d2d4a099220927..b6b112cfbeb8ba 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/swap/member.swap.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/swap/member.swap.pass.cpp @@ -70,7 +70,7 @@ static_assert(!HasMemberSwap); // Test noexcept template -concept MemberSwapNoexcept = +concept MemberSwapNoexcept = // requires(std::expected x, std::expected y) { { x.swap(y) } noexcept; }; @@ -198,6 +198,35 @@ constexpr bool test() { assert(!e2.error().swapCalled); } + // TailClobberer + { + // is_nothrow_move_constructible_v + { + std::expected, TailClobbererNonTrivialMove<1, true>> x(std::in_place); + std::expected, TailClobbererNonTrivialMove<1, true>> y(std::unexpect); + + x.swap(y); + + // Both of these would fail if adjusting the "has value" flags happened + // _before_ constructing the member objects inside the `swap`. + assert(!x.has_value()); + assert(y.has_value()); + } + + // !is_nothrow_move_constructible_v + { + std::expected, TailClobbererNonTrivialMove<1, false>> x(std::in_place); + std::expected, TailClobbererNonTrivialMove<1, false>> y(std::unexpect); + + x.swap(y); + + // Both of these would fail if adjusting the "has value" flags happened + // _before_ constructing the member objects inside the `swap`. + assert(!x.has_value()); + assert(y.has_value()); + } + } + return true; } @@ -228,6 +257,39 @@ void testException() { assert(*e1 == 5); } } + + // TailClobberer + { + // is_nothrow_move_constructible_v + { + std::expected, TailClobbererNonTrivialMove<1>> x(std::in_place); + std::expected, TailClobbererNonTrivialMove<1>> y(std::unexpect); + try { + x.swap(y); + assert(false); + } catch (Except) { + assert(x.has_value()); + // This would fail if `TailClobbererNonTrivialMove<1>` clobbered the + // flag when rolling back the swap. + assert(!y.has_value()); + } + } + + // !is_nothrow_move_constructible_v + { + std::expected, TailClobbererNonTrivialMove<1, false, true>> x(std::in_place); + std::expected, TailClobbererNonTrivialMove<1, false, true>> y(std::unexpect); + try { + x.swap(y); + assert(false); + } catch (Except) { + // This would fail if `TailClobbererNonTrivialMove<0>` clobbered the + // flag when rolling back the swap. + assert(x.has_value()); + assert(!y.has_value()); + } + } + } #endif // TEST_HAS_NO_EXCEPTIONS } diff --git a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.convert.copy.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.convert.copy.pass.cpp index 40f8efa5f94bf9..05f556e25eac14 100644 --- a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.convert.copy.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.convert.copy.pass.cpp @@ -33,6 +33,7 @@ #include #include "test_macros.h" +#include "../../types.h" // Test Constraints: template @@ -97,13 +98,19 @@ constexpr bool test() { assert(e1.error() == 5); } + // convert TailClobberer + { + const std::expected> e1(std::unexpect); + std::expected> e2 = e1; + assert(!e2.has_value()); + assert(!e1.has_value()); + } + return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct ThrowingInt { ThrowingInt(int) { throw Except{}; } }; diff --git a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.convert.move.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.convert.move.pass.cpp index b28fc7a03bb344..a48888be53ee07 100644 --- a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.convert.move.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.convert.move.pass.cpp @@ -34,6 +34,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints: template @@ -98,13 +99,19 @@ constexpr bool test() { assert(e1.error().get() == 0); } + // convert TailClobberer + { + std::expected> e1(std::unexpect); + std::expected> e2 = std::move(e1); + assert(!e2.has_value()); + assert(!e1.has_value()); + } + return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct ThrowingInt { ThrowingInt(int) { throw Except{}; } }; diff --git a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.copy.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.copy.pass.cpp index 689f152a3ac554..7c04a5fa9d0440 100644 --- a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.copy.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.copy.pass.cpp @@ -25,6 +25,7 @@ #include #include "test_macros.h" +#include "../../types.h" struct NonCopyable { NonCopyable(const NonCopyable&) = delete; @@ -62,13 +63,19 @@ constexpr bool test() { assert(!e2.has_value()); assert(e2.error() == 5); } + + // copy TailClobberer as error + { + const std::expected> e1(std::unexpect); + auto e2 = e1; + assert(!e2.has_value()); + } + return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing() = default; Throwing(const Throwing&) { throw Except{}; } diff --git a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.move.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.move.pass.cpp index 61bce2be4897f0..bfb5028c9264d0 100644 --- a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.move.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.move.pass.cpp @@ -25,6 +25,7 @@ #include #include "test_macros.h" +#include "../../types.h" struct NonMovable { NonMovable(NonMovable&&) = delete; @@ -76,13 +77,20 @@ constexpr bool test() { assert(e2.error() == 5); assert(!e1.has_value()); } + + // move TailClobbererNonTrivialMove as error + { + std::expected> e1(std::unexpect); + auto e2 = std::move(e1); + assert(!e2.has_value()); + assert(!e1.has_value()); + } + return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing() = default; Throwing(Throwing&&) { throw Except{}; } diff --git a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpect.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpect.pass.cpp index 0a857c77d9c7a6..85bc98d7f462de 100644 --- a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpect.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpect.pass.cpp @@ -26,6 +26,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints: static_assert(std::is_constructible_v, std::unexpect_t>); @@ -80,10 +81,13 @@ constexpr bool test() { testInt(); testInt(); testInt(); + testInt>(); testLValue(); testLValue(); + testLValue>(); testRValue(); testRValue(); + testRValue>(); // no arg { @@ -111,8 +115,6 @@ constexpr bool test() { void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing(int) { throw Except{}; }; }; diff --git a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpect_init_list.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpect_init_list.pass.cpp index a73921225f1fae..4128668a6b07bc 100644 --- a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpect_init_list.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpect_init_list.pass.cpp @@ -28,6 +28,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints: static_assert( @@ -89,13 +90,17 @@ constexpr bool test() { assert(m.get() == 0); } + // TailClobberer + { + std::expected> e(std::unexpect, {1, 2, 3}); + assert(!e.has_value()); + } + return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing(std::initializer_list, int) { throw Except{}; }; }; diff --git a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpected.copy.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpected.copy.pass.cpp index 89e1c9275e3e06..ba738a3e339de7 100644 --- a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpected.copy.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpected.copy.pass.cpp @@ -27,6 +27,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints static_assert(std::is_constructible_v, const std::unexpected&>); @@ -60,13 +61,12 @@ constexpr void testUnexpected() { constexpr bool test() { testUnexpected(); testUnexpected(); + testUnexpected>(); return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing(int) { throw Except{}; } }; diff --git a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpected.move.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpected.move.pass.cpp index 2ddcb63c085f05..33a5e7293df214 100644 --- a/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpected.move.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.void/ctor/ctor.unexpected.move.pass.cpp @@ -27,6 +27,7 @@ #include "MoveOnly.h" #include "test_macros.h" +#include "../../types.h" // Test Constraints static_assert(std::is_constructible_v, std::unexpected>); @@ -69,14 +70,13 @@ constexpr bool test() { testInt(); testInt(); testInt(); + testInt>(); testMoveOnly(); return true; } void testException() { #ifndef TEST_HAS_NO_EXCEPTIONS - struct Except {}; - struct Throwing { Throwing(int) { throw Except{}; } }; diff --git a/libcxx/test/std/utilities/expected/expected.void/observers/has_value.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/observers/has_value.pass.cpp index 42a173d60c898c..fe92bb401643d8 100644 --- a/libcxx/test/std/utilities/expected/expected.void/observers/has_value.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.void/observers/has_value.pass.cpp @@ -16,6 +16,7 @@ #include #include "test_macros.h" +#include "../../types.h" // Test noexcept template @@ -43,6 +44,17 @@ constexpr bool test() { assert(!e.has_value()); } + // See comments of the corresponding test in + // "expected.expected/observers/has_value.pass.cpp". + { + const std::expected> e(std::unexpect); + // clang-cl does not support [[no_unique_address]] yet. +#if !(defined(TEST_COMPILER_CLANG) && defined(_MSC_VER)) + LIBCPP_STATIC_ASSERT(sizeof(TailClobberer<1>) == sizeof(e)); +#endif + assert(!e.has_value()); + } + return true; } diff --git a/libcxx/test/std/utilities/expected/expected.void/swap/free.swap.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/swap/free.swap.pass.cpp index ad1eb3f0bd20d0..f7314c79fa6db7 100644 --- a/libcxx/test/std/utilities/expected/expected.void/swap/free.swap.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.void/swap/free.swap.pass.cpp @@ -91,7 +91,7 @@ constexpr bool test() { std::expected e1(std::in_place); std::expected e2(std::unexpect, s, 10); - e1.swap(e2); + swap(e1, e2); assert(!e1.has_value()); assert(e1.error().data_ == 10); @@ -107,7 +107,7 @@ constexpr bool test() { std::expected e1(std::unexpect, s, 10); std::expected e2(std::in_place); - e1.swap(e2); + swap(e1, e2); assert(e1.has_value()); assert(!e2.has_value()); @@ -117,6 +117,19 @@ constexpr bool test() { assert(s.dtorCalled); } + // TailClobberer + { + std::expected> x(std::in_place); + std::expected> y(std::unexpect); + + swap(x, y); + + // The next line would fail if adjusting the "has value" flag happened + // _before_ constructing the member object inside the `swap`. + assert(!x.has_value()); + assert(y.has_value()); + } + return true; } @@ -151,6 +164,21 @@ void testException() { assert(!e2Destroyed); } } + + // TailClobberer + { + std::expected> x(std::in_place); + std::expected> y(std::unexpect); + try { + swap(x, y); + assert(false); + } catch (Except) { + // This would fail if `TailClobbererNonTrivialMove<0, false, true>` + // clobbered the flag before throwing the exception. + assert(x.has_value()); + assert(!y.has_value()); + } + } #endif // TEST_HAS_NO_EXCEPTIONS } diff --git a/libcxx/test/std/utilities/expected/expected.void/swap/member.swap.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/swap/member.swap.pass.cpp index a71c701a469303..70e004abef684a 100644 --- a/libcxx/test/std/utilities/expected/expected.void/swap/member.swap.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.void/swap/member.swap.pass.cpp @@ -52,7 +52,7 @@ struct MoveMayThrow { }; template -concept MemberSwapNoexcept = +concept MemberSwapNoexcept = // requires(std::expected x, std::expected y) { { x.swap(y) } noexcept; }; @@ -126,6 +126,19 @@ constexpr bool test() { assert(s.dtorCalled); } + // TailClobberer + { + std::expected> x(std::in_place); + std::expected> y(std::unexpect); + + x.swap(y); + + // The next line would fail if adjusting the "has value" flag happened + // _before_ constructing the member object inside the `swap`. + assert(!x.has_value()); + assert(y.has_value()); + } + return true; } @@ -160,6 +173,21 @@ void testException() { assert(!e2Destroyed); } } + + // TailClobberer + { + std::expected> x(std::in_place); + std::expected> y(std::unexpect); + try { + x.swap(y); + assert(false); + } catch (Except) { + // This would fail if `TailClobbererNonTrivialMove<0, false, true>` + // clobbered the flag before throwing the exception. + assert(x.has_value()); + assert(!y.has_value()); + } + } #endif // TEST_HAS_NO_EXCEPTIONS } diff --git a/libcxx/test/std/utilities/expected/types.h b/libcxx/test/std/utilities/expected/types.h index 7c7e517785b4f7..ac4a82f2baf78b 100644 --- a/libcxx/test/std/utilities/expected/types.h +++ b/libcxx/test/std/utilities/expected/types.h @@ -9,7 +9,9 @@ #ifndef TEST_STD_UTILITIES_EXPECTED_TYPES_H #define TEST_STD_UTILITIES_EXPECTED_TYPES_H +#include #include +#include #include "test_macros.h" template @@ -150,4 +152,51 @@ struct MoveOnlyErrorType { MoveOnlyErrorType& operator=(const MoveOnlyErrorType&) = delete; }; +// This type has one byte of tail padding where `std::expected` may put its +// "has value" flag. The constructor will clobber all bytes including the +// tail padding. With this type we can check that `std::expected` handles +// the case where the "has value" flag is an overlapping subobject correctly. +// +// See https://github.com/llvm/llvm-project/issues/68552 for details. +template +struct TailClobberer { + constexpr TailClobberer() noexcept { + if (!std::is_constant_evaluated()) { + std::memset(this, Constant, sizeof(*this)); + } + // Always set `b` itself to `false` so that the comparison works. + b = false; + } + constexpr TailClobberer(const TailClobberer&) : TailClobberer() {} + constexpr TailClobberer(TailClobberer&&) = default; + // Converts from `int`/`std::initializer_list, used in some tests. + constexpr TailClobberer(int) : TailClobberer() {} + constexpr TailClobberer(std::initializer_list) noexcept : TailClobberer() {} + + friend constexpr bool operator==(const TailClobberer&, const TailClobberer&) = default; + + friend constexpr void swap(TailClobberer&, TailClobberer&){}; + +private: + alignas(2) bool b; +}; +static_assert(!std::is_trivially_copy_constructible_v>); +static_assert(std::is_trivially_move_constructible_v>); + +template +struct TailClobbererNonTrivialMove : TailClobberer { + using TailClobberer::TailClobberer; + constexpr TailClobbererNonTrivialMove(TailClobbererNonTrivialMove&&) noexcept(Noexcept) : TailClobberer() { +#ifndef TEST_HAS_NO_EXCEPTIONS + if constexpr (!Noexcept && ThrowOnMove) + throw Except{}; +#endif + } +}; +static_assert(!std::is_trivially_copy_constructible_v>); +static_assert(std::is_move_constructible_v>); +static_assert(!std::is_trivially_move_constructible_v>); +static_assert(std::is_nothrow_move_constructible_v>); +static_assert(!std::is_nothrow_move_constructible_v>); + #endif // TEST_STD_UTILITIES_EXPECTED_TYPES_H From d0caa4eef702f6eda1ce5ab3d72faabb55b15ca9 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Mon, 30 Oct 2023 23:06:28 +0400 Subject: [PATCH 064/144] [ADT] Backport std::to_underlying from C++23 (#70681) This patch backports a one-liner `std::to_underlying` that came with C++23. This is useful for refactoring unscoped enums into scoped enums, because the latter are not implicitly convertible to integer types. I followed libc++ implementation, but I consider their testing too heavy for us, so I wrote a simpler set of tests. --- llvm/include/llvm/ADT/STLForwardCompat.h | 7 +++++++ llvm/unittests/ADT/STLForwardCompatTest.cpp | 17 +++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h index 97d0bff9aaedbd..6afe3610b257fb 100644 --- a/llvm/include/llvm/ADT/STLForwardCompat.h +++ b/llvm/include/llvm/ADT/STLForwardCompat.h @@ -60,6 +60,13 @@ auto transformOptional(std::optional &&O, const Function &F) return std::nullopt; } +/// Returns underlying integer value of an enum. Backport of C++23 +/// std::to_underlying. +template +[[nodiscard]] constexpr std::underlying_type_t to_underlying(Enum E) { + return static_cast>(E); +} + } // namespace llvm #endif // LLVM_ADT_STLFORWARDCOMPAT_H diff --git a/llvm/unittests/ADT/STLForwardCompatTest.cpp b/llvm/unittests/ADT/STLForwardCompatTest.cpp index e9cd88cd4c27d8..b0c95d09ba2c6e 100644 --- a/llvm/unittests/ADT/STLForwardCompatTest.cpp +++ b/llvm/unittests/ADT/STLForwardCompatTest.cpp @@ -119,4 +119,21 @@ TEST(TransformTest, MoveTransformLlvm) { EXPECT_EQ(0u, MoveOnly::Destructions); } +TEST(TransformTest, ToUnderlying) { + enum E { A1 = 0, B1 = -1 }; + static_assert(llvm::to_underlying(A1) == 0); + static_assert(llvm::to_underlying(B1) == -1); + + enum E2 : unsigned char { A2 = 0, B2 }; + static_assert( + std::is_same_v); + static_assert(llvm::to_underlying(A2) == 0); + static_assert(llvm::to_underlying(B2) == 1); + + enum class E3 { A3 = -1, B3 }; + static_assert(std::is_same_v); + static_assert(llvm::to_underlying(E3::A3) == -1); + static_assert(llvm::to_underlying(E3::B3) == 0); +} + } // namespace From 3f2ed812f021e723212ddb9f808757a7ec3841e1 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 30 Oct 2023 12:09:43 -0700 Subject: [PATCH 065/144] [InstCombine] Infer nneg on zext when forming from non-negative sext (#70706) Builds on #67982 which recently introduced the nneg flag on a zext instruction. InstCombine is one of our largest canonicalizers of zext from non-negative sext instructions, so set the flag there. --- .../InstCombine/InstCombineCasts.cpp | 7 ++-- .../InstCombine/adjust-for-minmax.ll | 12 +++---- .../Transforms/InstCombine/cast-mul-select.ll | 4 +-- .../Transforms/InstCombine/icmp-ext-ext.ll | 4 +-- .../InstCombine/memcpy-from-global.ll | 8 ++--- .../InstCombine/minmax-intrinsics.ll | 4 +-- .../Transforms/InstCombine/narrow-math.ll | 8 ++--- .../Transforms/InstCombine/select_meta.ll | 2 +- llvm/test/Transforms/InstCombine/sext.ll | 16 +++++----- .../Transforms/InstCombine/udiv-simplify.ll | 4 +-- llvm/test/Transforms/InstCombine/wcslen-1.ll | 4 +-- llvm/test/Transforms/InstCombine/wcslen-3.ll | 2 +- .../sve-interleaved-masked-accesses.ll | 32 +++++++++---------- .../Transforms/LoopVectorize/induction.ll | 6 ++-- .../PhaseOrdering/gvn-replacement-vs-hoist.ll | 4 +-- .../test/Transforms/PhaseOrdering/lto-licm.ll | 2 +- .../SLPVectorizer/AArch64/getelementptr.ll | 6 ++-- .../AMDGPU/uniform-unswitch.ll | 2 +- 18 files changed, 65 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 2285a91cbdf2bb..2127000c4b780b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1372,8 +1372,11 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) { unsigned DestBitSize = DestTy->getScalarSizeInBits(); // If the value being extended is zero or positive, use a zext instead. - if (isKnownNonNegative(Src, DL, 0, &AC, &Sext, &DT)) - return CastInst::Create(Instruction::ZExt, Src, DestTy); + if (isKnownNonNegative(Src, DL, 0, &AC, &Sext, &DT)) { + auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy); + CI->setNonNeg(true); + return CI; + } // Try to extend the entire expression tree to the wide destination type. if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) { diff --git a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll index 67871f3d64c411..dced5594450537 100644 --- a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll +++ b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll @@ -246,7 +246,7 @@ define <2 x i32> @umin4_vec(<2 x i32> %n) { define i64 @smax_sext(i32 %a) { ; CHECK-LABEL: @smax_sext( ; CHECK-NEXT: [[NARROW:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 0) -; CHECK-NEXT: [[MAX:%.*]] = zext i32 [[NARROW]] to i64 +; CHECK-NEXT: [[MAX:%.*]] = zext nneg i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MAX]] ; %a_ext = sext i32 %a to i64 @@ -258,7 +258,7 @@ define i64 @smax_sext(i32 %a) { define <2 x i64> @smax_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @smax_sext_vec( ; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> zeroinitializer) -; CHECK-NEXT: [[MAX:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> +; CHECK-NEXT: [[MAX:%.*]] = zext nneg <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MAX]] ; %a_ext = sext <2 x i32> %a to <2 x i64> @@ -318,7 +318,7 @@ define <2 x i64> @umax_sext_vec(<2 x i32> %a) { define i64 @umin_sext(i32 %a) { ; CHECK-LABEL: @umin_sext( ; CHECK-NEXT: [[NARROW:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 2) -; CHECK-NEXT: [[MIN:%.*]] = zext i32 [[NARROW]] to i64 +; CHECK-NEXT: [[MIN:%.*]] = zext nneg i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MIN]] ; %a_ext = sext i32 %a to i64 @@ -330,7 +330,7 @@ define i64 @umin_sext(i32 %a) { define <2 x i64> @umin_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_sext_vec( ; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) -; CHECK-NEXT: [[MIN:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> +; CHECK-NEXT: [[MIN:%.*]] = zext nneg <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; %a_ext = sext <2 x i32> %a to <2 x i64> @@ -366,7 +366,7 @@ define <2 x i64> @umax_sext2_vec(<2 x i32> %a) { define i64 @umin_sext2(i32 %a) { ; CHECK-LABEL: @umin_sext2( ; CHECK-NEXT: [[NARROW:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 3) -; CHECK-NEXT: [[MIN:%.*]] = zext i32 [[NARROW]] to i64 +; CHECK-NEXT: [[MIN:%.*]] = zext nneg i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MIN]] ; %a_ext = sext i32 %a to i64 @@ -378,7 +378,7 @@ define i64 @umin_sext2(i32 %a) { define <2 x i64> @umin_sext2_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_sext2_vec( ; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) -; CHECK-NEXT: [[MIN:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> +; CHECK-NEXT: [[MIN:%.*]] = zext nneg <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; %a_ext = sext <2 x i32> %a to <2 x i64> diff --git a/llvm/test/Transforms/InstCombine/cast-mul-select.ll b/llvm/test/Transforms/InstCombine/cast-mul-select.ll index ab8333beb9e766..23e934de0baeb7 100644 --- a/llvm/test/Transforms/InstCombine/cast-mul-select.ll +++ b/llvm/test/Transforms/InstCombine/cast-mul-select.ll @@ -193,7 +193,7 @@ define void @PR36225(i32 %a, i32 %b, i1 %c1, i3 %v1, i3 %v2) { ; CHECK-NEXT: ] ; CHECK: for.end: ; CHECK-NEXT: [[H:%.*]] = phi i8 [ [[SPEC_SELECT]], [[FOR_BODY3_US]] ], [ [[SPEC_SELECT]], [[FOR_BODY3_US]] ], [ 0, [[FOR_BODY3]] ], [ 0, [[FOR_BODY3]] ] -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[H]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = zext nneg i8 [[H]] to i32 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], [[A:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[EXIT2:%.*]] ; CHECK: exit2: @@ -224,7 +224,7 @@ define void @PR36225(i32 %a, i32 %b, i1 %c1, i3 %v1, i3 %v2) { ; DBGINFO: for.end: ; DBGINFO-NEXT: [[H:%.*]] = phi i8 [ [[SPEC_SELECT]], [[FOR_BODY3_US]] ], [ [[SPEC_SELECT]], [[FOR_BODY3_US]] ], [ 0, [[FOR_BODY3]] ], [ 0, [[FOR_BODY3]] ], !dbg [[DBG100:![0-9]+]] ; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[H]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG100]] -; DBGINFO-NEXT: [[CONV:%.*]] = zext i8 [[H]] to i32, !dbg [[DBG101:![0-9]+]] +; DBGINFO-NEXT: [[CONV:%.*]] = zext nneg i8 [[H]] to i32, !dbg [[DBG101:![0-9]+]] ; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[CONV]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG101]] ; DBGINFO-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], [[A:%.*]], !dbg [[DBG102:![0-9]+]] ; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]], metadata [[META93:![0-9]+]], metadata !DIExpression()), !dbg [[DBG102]] diff --git a/llvm/test/Transforms/InstCombine/icmp-ext-ext.ll b/llvm/test/Transforms/InstCombine/icmp-ext-ext.ll index b3dafe06a38799..f70e48e2738461 100644 --- a/llvm/test/Transforms/InstCombine/icmp-ext-ext.ll +++ b/llvm/test/Transforms/InstCombine/icmp-ext-ext.ll @@ -289,7 +289,7 @@ define i1 @zext_sext_eq_known_nonneg(i8 %x, i8 %y) { define i1 @zext_sext_sle_known_nonneg_op0_narrow(i8 %x, i16 %y) { ; CHECK-LABEL: @zext_sext_sle_known_nonneg_op0_narrow( ; CHECK-NEXT: [[N:%.*]] = and i8 [[X:%.*]], 12 -; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[N]] to i16 +; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i8 [[N]] to i16 ; CHECK-NEXT: [[C:%.*]] = icmp sle i16 [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[C]] ; @@ -370,7 +370,7 @@ define <2 x i1> @sext_zext_sge_known_nonneg_op0_narrow(<2 x i5> %x, <2 x i8> %y) define i1 @sext_zext_uge_known_nonneg_op0_wide(i16 %x, i8 %y) { ; CHECK-LABEL: @sext_zext_uge_known_nonneg_op0_wide( ; CHECK-NEXT: [[N:%.*]] = and i8 [[Y:%.*]], 12 -; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[N]] to i16 +; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i8 [[N]] to i16 ; CHECK-NEXT: [[C:%.*]] = icmp ule i16 [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret i1 [[C]] ; diff --git a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll index ea9b16e1382ee9..59e756eed3fd77 100644 --- a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll +++ b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll @@ -8,25 +8,25 @@ define float @test1(i32 %hash, float %x, float %y, float %z, float %w) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[HASH:%.*]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP3]], 124 -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[TMP5]] to i64 ; CHECK-NEXT: [[TMP753:%.*]] = getelementptr [128 x float], ptr @C.0.1248, i64 0, i64 [[TMP0]] ; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP753]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = fmul float [[TMP9]], [[X:%.*]] ; CHECK-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], 0.000000e+00 ; CHECK-NEXT: [[TMP17_SUM52:%.*]] = or i32 [[TMP5]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP17_SUM52]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP17_SUM52]] to i64 ; CHECK-NEXT: [[TMP1851:%.*]] = getelementptr [128 x float], ptr @C.0.1248, i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr [[TMP1851]], align 4 ; CHECK-NEXT: [[TMP21:%.*]] = fmul float [[TMP19]], [[Y:%.*]] ; CHECK-NEXT: [[TMP23:%.*]] = fadd float [[TMP21]], [[TMP13]] ; CHECK-NEXT: [[TMP27_SUM50:%.*]] = or i32 [[TMP5]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP27_SUM50]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP27_SUM50]] to i64 ; CHECK-NEXT: [[TMP2849:%.*]] = getelementptr [128 x float], ptr @C.0.1248, i64 0, i64 [[TMP2]] ; CHECK-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP2849]], align 4 ; CHECK-NEXT: [[TMP31:%.*]] = fmul float [[TMP29]], [[Z:%.*]] ; CHECK-NEXT: [[TMP33:%.*]] = fadd float [[TMP31]], [[TMP23]] ; CHECK-NEXT: [[TMP37_SUM48:%.*]] = or i32 [[TMP5]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP37_SUM48]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = zext nneg i32 [[TMP37_SUM48]] to i64 ; CHECK-NEXT: [[TMP3847:%.*]] = getelementptr [128 x float], ptr @C.0.1248, i64 0, i64 [[TMP3]] ; CHECK-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP3847]], align 4 ; CHECK-NEXT: [[TMP41:%.*]] = fmul float [[TMP39]], [[W:%.*]] diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index 7a4da66ae2151c..09003ebacd6ca1 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -217,7 +217,7 @@ define i8 @umin_zext_uses(i5 %x, i5 %y) { define i8 @smax_sext_constant(i5 %x) { ; CHECK-LABEL: @smax_sext_constant( ; CHECK-NEXT: [[TMP1:%.*]] = call i5 @llvm.smax.i5(i5 [[X:%.*]], i5 7) -; CHECK-NEXT: [[M:%.*]] = zext i5 [[TMP1]] to i8 +; CHECK-NEXT: [[M:%.*]] = zext nneg i5 [[TMP1]] to i8 ; CHECK-NEXT: ret i8 [[M]] ; %e = sext i5 %x to i8 @@ -322,7 +322,7 @@ define i8 @umax_zext_constant_big(i5 %x) { define i8 @umin_sext_constant(i5 %x) { ; CHECK-LABEL: @umin_sext_constant( ; CHECK-NEXT: [[TMP1:%.*]] = call i5 @llvm.umin.i5(i5 [[X:%.*]], i5 7) -; CHECK-NEXT: [[M:%.*]] = zext i5 [[TMP1]] to i8 +; CHECK-NEXT: [[M:%.*]] = zext nneg i5 [[TMP1]] to i8 ; CHECK-NEXT: ret i8 [[M]] ; %e = sext i5 %x to i8 diff --git a/llvm/test/Transforms/InstCombine/narrow-math.ll b/llvm/test/Transforms/InstCombine/narrow-math.ll index bfff00f62deac4..6eacb1ca2c018e 100644 --- a/llvm/test/Transforms/InstCombine/narrow-math.ll +++ b/llvm/test/Transforms/InstCombine/narrow-math.ll @@ -141,7 +141,7 @@ define i64 @test2(i32 %V) { ; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range [[RNG0]] ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range [[RNG0]] ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CALL1]], [[CALL2]] -; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[ADD]] to i64 +; CHECK-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[ADD]] to i64 ; CHECK-NEXT: ret i64 [[ZEXT]] ; %call1 = call i32 @callee(), !range !0 @@ -172,7 +172,7 @@ define i64 @test4(i32 %V) { ; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range [[RNG0]] ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range [[RNG0]] ; CHECK-NEXT: [[ADD:%.*]] = mul nuw nsw i32 [[CALL1]], [[CALL2]] -; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[ADD]] to i64 +; CHECK-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[ADD]] to i64 ; CHECK-NEXT: ret i64 [[ZEXT]] ; %call1 = call i32 @callee(), !range !0 @@ -480,7 +480,7 @@ define i64 @test12(i32 %V) { ; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range [[RNG1]] ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range [[RNG1]] ; CHECK-NEXT: [[NARROW:%.*]] = mul nsw i32 [[CALL1]], [[CALL2]] -; CHECK-NEXT: [[ADD:%.*]] = zext i32 [[NARROW]] to i64 +; CHECK-NEXT: [[ADD:%.*]] = zext nneg i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[ADD]] ; %call1 = call i32 @callee(), !range !1 @@ -614,7 +614,7 @@ define i64 @test18(i32 %V) { define i64 @test19(i32 %V) { ; CHECK-LABEL: @test19( ; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range [[RNG0]] -; CHECK-NEXT: [[SEXT1:%.*]] = zext i32 [[CALL1]] to i64 +; CHECK-NEXT: [[SEXT1:%.*]] = zext nneg i32 [[CALL1]] to i64 ; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i64 -2147481648, [[SEXT1]] ; CHECK-NEXT: ret i64 [[SUB]] ; diff --git a/llvm/test/Transforms/InstCombine/select_meta.ll b/llvm/test/Transforms/InstCombine/select_meta.ll index f788dec108dfb0..df1e5a82ad5d15 100644 --- a/llvm/test/Transforms/InstCombine/select_meta.ll +++ b/llvm/test/Transforms/InstCombine/select_meta.ll @@ -64,7 +64,7 @@ define i32 @foo2(i32, i32) local_unnamed_addr #0 { define i64 @test43(i32 %a) nounwind { ; CHECK-LABEL: @test43( ; CHECK-NEXT: [[NARROW:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 0) -; CHECK-NEXT: [[MAX:%.*]] = zext i32 [[NARROW]] to i64 +; CHECK-NEXT: [[MAX:%.*]] = zext nneg i32 [[NARROW]] to i64 ; CHECK-NEXT: ret i64 [[MAX]] ; %a_ext = sext i32 %a to i64 diff --git a/llvm/test/Transforms/InstCombine/sext.ll b/llvm/test/Transforms/InstCombine/sext.ll index c204b37ff85a58..0e7caff0cfdefa 100644 --- a/llvm/test/Transforms/InstCombine/sext.ll +++ b/llvm/test/Transforms/InstCombine/sext.ll @@ -12,7 +12,7 @@ declare void @use_vec(<2 x i5>) define i64 @test1(i32 %x) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: [[T:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0:![0-9]+]] -; CHECK-NEXT: [[S:%.*]] = zext i32 [[T]] to i64 +; CHECK-NEXT: [[S:%.*]] = zext nneg i32 [[T]] to i64 ; CHECK-NEXT: ret i64 [[S]] ; %t = call i32 @llvm.ctpop.i32(i32 %x) @@ -23,7 +23,7 @@ define i64 @test1(i32 %x) { define i64 @test2(i32 %x) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: [[T:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), !range [[RNG0]] -; CHECK-NEXT: [[S:%.*]] = zext i32 [[T]] to i64 +; CHECK-NEXT: [[S:%.*]] = zext nneg i32 [[T]] to i64 ; CHECK-NEXT: ret i64 [[S]] ; %t = call i32 @llvm.ctlz.i32(i32 %x, i1 true) @@ -34,7 +34,7 @@ define i64 @test2(i32 %x) { define i64 @test3(i32 %x) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: [[T:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range [[RNG0]] -; CHECK-NEXT: [[S:%.*]] = zext i32 [[T]] to i64 +; CHECK-NEXT: [[S:%.*]] = zext nneg i32 [[T]] to i64 ; CHECK-NEXT: ret i64 [[S]] ; %t = call i32 @llvm.cttz.i32(i32 %x, i1 true) @@ -45,7 +45,7 @@ define i64 @test3(i32 %x) { define i64 @test4(i32 %x) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: [[T:%.*]] = udiv i32 [[X:%.*]], 3 -; CHECK-NEXT: [[S:%.*]] = zext i32 [[T]] to i64 +; CHECK-NEXT: [[S:%.*]] = zext nneg i32 [[T]] to i64 ; CHECK-NEXT: ret i64 [[S]] ; %t = udiv i32 %x, 3 @@ -56,7 +56,7 @@ define i64 @test4(i32 %x) { define i64 @test5(i32 %x) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: [[T:%.*]] = urem i32 [[X:%.*]], 30000 -; CHECK-NEXT: [[S:%.*]] = zext i32 [[T]] to i64 +; CHECK-NEXT: [[S:%.*]] = zext nneg i32 [[T]] to i64 ; CHECK-NEXT: ret i64 [[S]] ; %t = urem i32 %x, 30000 @@ -68,7 +68,7 @@ define i64 @test6(i32 %x) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: [[U:%.*]] = lshr i32 [[X:%.*]], 3 ; CHECK-NEXT: [[T:%.*]] = mul nuw nsw i32 [[U]], 3 -; CHECK-NEXT: [[S:%.*]] = zext i32 [[T]] to i64 +; CHECK-NEXT: [[S:%.*]] = zext nneg i32 [[T]] to i64 ; CHECK-NEXT: ret i64 [[S]] ; %u = lshr i32 %x, 3 @@ -81,7 +81,7 @@ define i64 @test7(i32 %x) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[T:%.*]] = and i32 [[X:%.*]], 511 ; CHECK-NEXT: [[U:%.*]] = sub nuw nsw i32 20000, [[T]] -; CHECK-NEXT: [[S:%.*]] = zext i32 [[U]] to i64 +; CHECK-NEXT: [[S:%.*]] = zext nneg i32 [[U]] to i64 ; CHECK-NEXT: ret i64 [[S]] ; %t = and i32 %x, 511 @@ -296,7 +296,7 @@ define i32 @test17(i1 %x) { define i32 @test18(i16 %x) { ; CHECK-LABEL: @test18( ; CHECK-NEXT: [[SEL:%.*]] = call i16 @llvm.smax.i16(i16 [[X:%.*]], i16 0) -; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[SEL]] to i32 +; CHECK-NEXT: [[EXT:%.*]] = zext nneg i16 [[SEL]] to i32 ; CHECK-NEXT: ret i32 [[EXT]] ; %cmp = icmp slt i16 %x, 0 diff --git a/llvm/test/Transforms/InstCombine/udiv-simplify.ll b/llvm/test/Transforms/InstCombine/udiv-simplify.ll index 724170e376b35a..a38d32d7925500 100644 --- a/llvm/test/Transforms/InstCombine/udiv-simplify.ll +++ b/llvm/test/Transforms/InstCombine/udiv-simplify.ll @@ -27,7 +27,7 @@ define i64 @test1_PR2274(i32 %x, i32 %g) nounwind { ; CHECK-LABEL: @test1_PR2274( ; CHECK-NEXT: [[Y:%.*]] = lshr i32 [[X:%.*]], 30 ; CHECK-NEXT: [[R:%.*]] = udiv i32 [[Y]], [[G:%.*]] -; CHECK-NEXT: [[Z:%.*]] = zext i32 [[R]] to i64 +; CHECK-NEXT: [[Z:%.*]] = zext nneg i32 [[R]] to i64 ; CHECK-NEXT: ret i64 [[Z]] ; %y = lshr i32 %x, 30 @@ -39,7 +39,7 @@ define i64 @test2_PR2274(i32 %x, i32 %v) nounwind { ; CHECK-LABEL: @test2_PR2274( ; CHECK-NEXT: [[Y:%.*]] = lshr i32 [[X:%.*]], 31 ; CHECK-NEXT: [[R:%.*]] = udiv i32 [[Y]], [[V:%.*]] -; CHECK-NEXT: [[Z:%.*]] = zext i32 [[R]] to i64 +; CHECK-NEXT: [[Z:%.*]] = zext nneg i32 [[R]] to i64 ; CHECK-NEXT: ret i64 [[Z]] ; %y = lshr i32 %x, 31 diff --git a/llvm/test/Transforms/InstCombine/wcslen-1.ll b/llvm/test/Transforms/InstCombine/wcslen-1.ll index 5d05cff6e54b84..4a9a4b92632027 100644 --- a/llvm/test/Transforms/InstCombine/wcslen-1.ll +++ b/llvm/test/Transforms/InstCombine/wcslen-1.ll @@ -175,7 +175,7 @@ define i64 @test_no_simplify2_no_null_opt(i32 %x) #0 { define i64 @test_no_simplify3(i32 %x) { ; CHECK-LABEL: @test_no_simplify3( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 15 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[AND]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[AND]] to i64 ; CHECK-NEXT: [[HELLO_P:%.*]] = getelementptr inbounds [13 x i32], ptr @null_hello_mid, i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]]) ; CHECK-NEXT: ret i64 [[HELLO_L]] @@ -189,7 +189,7 @@ define i64 @test_no_simplify3(i32 %x) { define i64 @test_no_simplify3_no_null_opt(i32 %x) #0 { ; CHECK-LABEL: @test_no_simplify3_no_null_opt( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 15 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[AND]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[AND]] to i64 ; CHECK-NEXT: [[HELLO_P:%.*]] = getelementptr inbounds [13 x i32], ptr @null_hello_mid, i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[HELLO_L:%.*]] = call i64 @wcslen(ptr [[HELLO_P]]) ; CHECK-NEXT: ret i64 [[HELLO_L]] diff --git a/llvm/test/Transforms/InstCombine/wcslen-3.ll b/llvm/test/Transforms/InstCombine/wcslen-3.ll index c463b6b1e9526e..6dc9534c4986e9 100644 --- a/llvm/test/Transforms/InstCombine/wcslen-3.ll +++ b/llvm/test/Transforms/InstCombine/wcslen-3.ll @@ -164,7 +164,7 @@ define i64 @test_no_simplify2(i16 %x) { define i64 @test_no_simplify3(i16 %x) { ; CHECK-LABEL: @test_no_simplify3( ; CHECK-NEXT: [[AND:%.*]] = and i16 [[X:%.*]], 15 -; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[AND]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[AND]] to i64 ; CHECK-NEXT: [[HELLO_P:%.*]] = getelementptr inbounds [13 x i16], ptr @null_hello_mid, i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]]) ; CHECK-NEXT: ret i64 [[HELLO_L]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 83523545f84d0a..4803b96642afd5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -82,19 +82,19 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; SCALAR_TAIL_FOLDING: if.then: ; SCALAR_TAIL_FOLDING-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP22:%.*]] = zext i32 [[MUL]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP22:%.*]] = zext nneg i32 [[MUL]] to i64 ; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP22]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: [[ADD:%.*]] = or i32 [[MUL]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP24:%.*]] = zext i32 [[ADD]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP24:%.*]] = zext nneg i32 [[ADD]] to i64 ; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP24]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP25:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: [[SPEC_SELECT_I:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP23]], i8 [[TMP25]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP26:%.*]] = zext i32 [[MUL]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP26:%.*]] = zext nneg i32 [[MUL]] to i64 ; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP26]] ; SCALAR_TAIL_FOLDING-NEXT: store i8 [[SPEC_SELECT_I]], ptr [[ARRAYIDX6]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: [[SUB:%.*]] = sub i8 0, [[SPEC_SELECT_I]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP27:%.*]] = zext i32 [[ADD]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP27:%.*]] = zext nneg i32 [[ADD]] to i64 ; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP27]] ; SCALAR_TAIL_FOLDING-NEXT: store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_INC]] @@ -239,12 +239,12 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = zext [[TMP7]] to +; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = zext nneg [[TMP7]] to ; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP8]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), [[TMP9]], i32 1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = or [[TMP7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = zext [[TMP11]] to +; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = zext nneg [[TMP11]] to ; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP12]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP13]], i32 1, [[TMP10]]) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() @@ -262,14 +262,14 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING: for.body: ; SCALAR_TAIL_FOLDING-NEXT: [[IX_012:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] ; SCALAR_TAIL_FOLDING-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_012]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = zext i32 [[MUL]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = zext nneg i32 [[MUL]] to i64 ; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP17]] ; SCALAR_TAIL_FOLDING-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_012]], [[CONV]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; SCALAR_TAIL_FOLDING: if.then: ; SCALAR_TAIL_FOLDING-NEXT: [[ADD:%.*]] = or i32 [[MUL]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = zext i32 [[ADD]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = zext nneg i32 [[ADD]] to i64 ; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP18]] ; SCALAR_TAIL_FOLDING-NEXT: store i8 2, ptr [[ARRAYIDX3]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_INC]] @@ -303,12 +303,12 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = zext [[TMP6]] to +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = zext nneg [[TMP6]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP7]] ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), [[TMP8]], i32 1, [[ACTIVE_LANE_MASK]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = or [[TMP6]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = zext [[TMP10]] to +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = zext nneg [[TMP10]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP11]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP12]], i32 1, [[TMP13]]) @@ -404,12 +404,12 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = zext [[TMP7]] to +; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = zext nneg [[TMP7]] to ; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP9]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), [[TMP10]], i32 1, [[TMP8]]) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = or [[TMP7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = zext [[TMP12]] to +; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = zext nneg [[TMP12]] to ; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP13]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP14]], i32 1, [[TMP11]]) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() @@ -430,7 +430,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_018]], [[CONV]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; SCALAR_TAIL_FOLDING: if.then: -; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = zext i32 [[MUL]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = zext nneg i32 [[MUL]] to i64 ; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP18]] ; SCALAR_TAIL_FOLDING-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: br label [[IF_END]] @@ -439,7 +439,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP4]], label [[IF_THEN6:%.*]], label [[FOR_INC]] ; SCALAR_TAIL_FOLDING: if.then6: ; SCALAR_TAIL_FOLDING-NEXT: [[ADD:%.*]] = or i32 [[MUL]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = zext i32 [[ADD]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = zext nneg i32 [[ADD]] to i64 ; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP19]] ; SCALAR_TAIL_FOLDING-NEXT: store i8 2, ptr [[ARRAYIDX7]], align 1 ; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_INC]] @@ -477,13 +477,13 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = zext [[TMP6]] to +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = zext nneg [[TMP6]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP8]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP7]], zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), [[TMP9]], i32 1, [[TMP10]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = or [[TMP6]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = zext [[TMP12]] to +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = zext nneg [[TMP12]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP13]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP14]], i32 1, [[TMP15]]) diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 90ad054c5a22e5..2df55bdf89a00f 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -2083,7 +2083,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; IND: for.body: ; IND-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ] ; IND-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ] -; IND-NEXT: [[TMP16:%.*]] = zext i32 [[I]] to i64 +; IND-NEXT: [[TMP16:%.*]] = zext nneg i32 [[I]] to i64 ; IND-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]] ; IND-NEXT: [[VAR1:%.*]] = load i32, ptr [[VAR0]], align 4 ; IND-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]] @@ -2173,7 +2173,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL: for.body: ; UNROLL-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ] ; UNROLL-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ] -; UNROLL-NEXT: [[TMP26:%.*]] = zext i32 [[I]] to i64 +; UNROLL-NEXT: [[TMP26:%.*]] = zext nneg i32 [[I]] to i64 ; UNROLL-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP26]] ; UNROLL-NEXT: [[VAR1:%.*]] = load i32, ptr [[VAR0]], align 4 ; UNROLL-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]] @@ -2397,7 +2397,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE: for.body: ; INTERLEAVE-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ] ; INTERLEAVE-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ] -; INTERLEAVE-NEXT: [[TMP46:%.*]] = zext i32 [[I]] to i64 +; INTERLEAVE-NEXT: [[TMP46:%.*]] = zext nneg i32 [[I]] to i64 ; INTERLEAVE-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP46]] ; INTERLEAVE-NEXT: [[VAR1:%.*]] = load i32, ptr [[VAR0]], align 4 ; INTERLEAVE-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]] diff --git a/llvm/test/Transforms/PhaseOrdering/gvn-replacement-vs-hoist.ll b/llvm/test/Transforms/PhaseOrdering/gvn-replacement-vs-hoist.ll index ea863f7355ad9f..522ebf9dcc04bc 100644 --- a/llvm/test/Transforms/PhaseOrdering/gvn-replacement-vs-hoist.ll +++ b/llvm/test/Transforms/PhaseOrdering/gvn-replacement-vs-hoist.ll @@ -6,7 +6,7 @@ define void @test(ptr noundef %a, i32 noundef %beam) { ; CHECK-SAME: (ptr nocapture noundef writeonly [[A:%.*]], i32 noundef [[BEAM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[BEAM]], 1 -; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[MUL]] to i64 +; CHECK-NEXT: [[IDXPROM:%.*]] = zext nneg i32 [[MUL]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: @@ -20,7 +20,7 @@ define void @test(ptr noundef %a, i32 noundef %beam) { ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: if.else: ; CHECK-NEXT: [[MUL2:%.*]] = shl nuw nsw i32 [[I_06]], 1 -; CHECK-NEXT: [[IDXPROM3:%.*]] = zext i32 [[MUL2]] to i64 +; CHECK-NEXT: [[IDXPROM3:%.*]] = zext nneg i32 [[MUL2]] to i64 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM3]] ; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX4]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] diff --git a/llvm/test/Transforms/PhaseOrdering/lto-licm.ll b/llvm/test/Transforms/PhaseOrdering/lto-licm.ll index 1a5a67d8241a1b..763e266e6a3829 100644 --- a/llvm/test/Transforms/PhaseOrdering/lto-licm.ll +++ b/llvm/test/Transforms/PhaseOrdering/lto-licm.ll @@ -11,7 +11,7 @@ define void @hoist_fdiv(ptr %a, float %b) { ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], 1024 ; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_0]] to i64 +; CHECK-NEXT: [[IDXPROM:%.*]] = zext nneg i32 [[I_0]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IDXPROM]] ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP0]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll index 9a55e1eee5bd40..63934a2cc96461 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll @@ -67,7 +67,7 @@ define i32 @getelementptr_4x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = zext nneg i32 [[TMP6]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[SUM_032]] @@ -159,12 +159,12 @@ define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[SUM_032:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD16]], [[FOR_BODY]] ] ; CHECK-NEXT: [[T4:%.*]] = shl nuw nsw i32 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[T4]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[T4]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[SUM_032]] ; CHECK-NEXT: [[T7:%.*]] = or i32 [[T4]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[T7]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = zext nneg i32 [[T7]] to i64 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP3]] ; CHECK-NEXT: [[T8:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]] diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll index cbbf4d6e7be195..2069efd12d27af 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll @@ -34,7 +34,7 @@ define amdgpu_kernel void @uniform_unswitch(ptr nocapture %out, i32 %n, i32 %x) ; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[I_07]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[I_07]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_GLOBAL]], i64 [[TMP0]] ; CHECK-NEXT: store i32 [[I_07]], ptr addrspace(1) [[ARRAYIDX]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] From 4aa12afb967bd7c5f051f3b72271f787f1a7538b Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 30 Oct 2023 12:23:51 -0700 Subject: [PATCH 066/144] [Github] Fetch all commits in PR for code formatting checks (#69766) This patch makes a couple changes to the PR code formatting check: - Moves the `changed-files` action to before the checkout to make sure that it pulls information from the Github API rather than by running `git diff` to alleviate some performance problems. - Checkout the head of the pull request head instead of the base of the pull request to ensure that we have the PR commits inside the checkout. - Add an additional sparse checkout of the necessary LLVM tools to run the action to alleviate security problems introduced by checking out the head of the pull request. Only code from the base of the pull request runs. - Adjust the commit references to be based on `HEAD` as Github doesn't give exact commit SHAs for the first commit in the PR. --- .github/workflows/pr-code-format.yml | 46 ++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml index 3a91ffb0b1ad9a..c021c14f4a4953 100644 --- a/.github/workflows/pr-code-format.yml +++ b/.github/workflows/pr-code-format.yml @@ -7,17 +7,37 @@ jobs: code_formatter: runs-on: ubuntu-latest steps: - - name: Fetch LLVM sources - uses: actions/checkout@v4 - with: - fetch-depth: 2 - + # Get changed files before checking out the repository to force the action + # to analyze the diff from the Github API rather than looking at the + # shallow clone and erroring out, which is significantly more prone to + # failure. - name: Get changed files id: changed-files uses: tj-actions/changed-files@v39 with: separator: "," - fetch_depth: 100 # Fetches only the last 10 commits + + - name: Calculate number of commits to fetch + run: echo "PR_FETCH_DEPTH=$(( ${{ github.event.pull_request.commits }} + 1 ))" >> "${GITHUB_ENV}" + + - name: Fetch PR sources + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.ref }} + fetch-depth: ${{ env.PR_FETCH_DEPTH }} + path: pr-sources + + # We need to make sure that we aren't executing/using any code from the + # PR for security reasons as we're using pull_request_target. Checkout + # the target branch with the necessary files. + - name: Fetch LLVM Sources + uses: actions/checkout@v4 + with: + sparse-checkout: | + llvm/utils/git/requirements_formatting.txt + llvm/utils/git/code-format-helper.py + sparse-checkout-cone-mode: false + path: llvm-sources - name: "Listed files" run: | @@ -34,21 +54,21 @@ jobs: with: python-version: '3.11' cache: 'pip' - cache-dependency-path: 'llvm/utils/git/requirements_formatting.txt' + cache-dependency-path: 'llvm-sources/llvm/utils/git/requirements_formatting.txt' - name: Install python dependencies - run: pip install -r llvm/utils/git/requirements_formatting.txt + run: pip install -r llvm-sources/llvm/utils/git/requirements_formatting.txt - name: Run code formatter env: GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }} - START_REV: ${{ github.event.pull_request.base.sha }} - END_REV: ${{ github.event.pull_request.head.sha }} + PR_DEPTH: ${{ github.event.pull_request.commits }} CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} + working-directory: ./pr-sources run: | - python llvm/utils/git/code-format-helper.py \ + python ../llvm-sources/llvm/utils/git/code-format-helper.py \ --token ${{ secrets.GITHUB_TOKEN }} \ --issue-number $GITHUB_PR_NUMBER \ - --start-rev $START_REV \ - --end-rev $END_REV \ + --start-rev HEAD~$PR_DEPTH \ + --end-rev HEAD \ --changed-files "$CHANGED_FILES" From 96410a6b1403de3a90fa76bc68e97807be969e97 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 30 Oct 2023 12:33:35 -0700 Subject: [PATCH 067/144] Revert "[Github] Fetch all commits in PR for code formatting checks (#69766)" This reverts commit 4aa12afb967bd7c5f051f3b72271f787f1a7538b. This change introduced failures upon checking out the PR source code. Pulling this out of tree while I investigate further. --- .github/workflows/pr-code-format.yml | 46 ++++++++-------------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml index c021c14f4a4953..3a91ffb0b1ad9a 100644 --- a/.github/workflows/pr-code-format.yml +++ b/.github/workflows/pr-code-format.yml @@ -7,37 +7,17 @@ jobs: code_formatter: runs-on: ubuntu-latest steps: - # Get changed files before checking out the repository to force the action - # to analyze the diff from the Github API rather than looking at the - # shallow clone and erroring out, which is significantly more prone to - # failure. + - name: Fetch LLVM sources + uses: actions/checkout@v4 + with: + fetch-depth: 2 + - name: Get changed files id: changed-files uses: tj-actions/changed-files@v39 with: separator: "," - - - name: Calculate number of commits to fetch - run: echo "PR_FETCH_DEPTH=$(( ${{ github.event.pull_request.commits }} + 1 ))" >> "${GITHUB_ENV}" - - - name: Fetch PR sources - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.ref }} - fetch-depth: ${{ env.PR_FETCH_DEPTH }} - path: pr-sources - - # We need to make sure that we aren't executing/using any code from the - # PR for security reasons as we're using pull_request_target. Checkout - # the target branch with the necessary files. - - name: Fetch LLVM Sources - uses: actions/checkout@v4 - with: - sparse-checkout: | - llvm/utils/git/requirements_formatting.txt - llvm/utils/git/code-format-helper.py - sparse-checkout-cone-mode: false - path: llvm-sources + fetch_depth: 100 # Fetches only the last 10 commits - name: "Listed files" run: | @@ -54,21 +34,21 @@ jobs: with: python-version: '3.11' cache: 'pip' - cache-dependency-path: 'llvm-sources/llvm/utils/git/requirements_formatting.txt' + cache-dependency-path: 'llvm/utils/git/requirements_formatting.txt' - name: Install python dependencies - run: pip install -r llvm-sources/llvm/utils/git/requirements_formatting.txt + run: pip install -r llvm/utils/git/requirements_formatting.txt - name: Run code formatter env: GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }} - PR_DEPTH: ${{ github.event.pull_request.commits }} + START_REV: ${{ github.event.pull_request.base.sha }} + END_REV: ${{ github.event.pull_request.head.sha }} CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} - working-directory: ./pr-sources run: | - python ../llvm-sources/llvm/utils/git/code-format-helper.py \ + python llvm/utils/git/code-format-helper.py \ --token ${{ secrets.GITHUB_TOKEN }} \ --issue-number $GITHUB_PR_NUMBER \ - --start-rev HEAD~$PR_DEPTH \ - --end-rev HEAD \ + --start-rev $START_REV \ + --end-rev $END_REV \ --changed-files "$CHANGED_FILES" From 7c2ef38c36eda2907cd6a3efff88bb86a1b381a3 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Mon, 30 Oct 2023 22:31:04 +0300 Subject: [PATCH 068/144] [mlir][NFC] Use `llvm::to_underlying` in sparse tensor IR detail --- mlir/lib/Dialect/SparseTensor/IR/Detail/DimLvlMap.h | 3 ++- .../Dialect/SparseTensor/IR/Detail/TemplateExtras.h | 7 ------- mlir/lib/Dialect/SparseTensor/IR/Detail/Var.h | 13 +++++++------ 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/IR/Detail/DimLvlMap.h b/mlir/lib/Dialect/SparseTensor/IR/Detail/DimLvlMap.h index 664b49509f070f..b3200d0983eb79 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/Detail/DimLvlMap.h +++ b/mlir/lib/Dialect/SparseTensor/IR/Detail/DimLvlMap.h @@ -12,6 +12,7 @@ #include "Var.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" +#include "llvm/ADT/STLForwardCompat.h" namespace mlir { namespace sparse_tensor { @@ -22,7 +23,7 @@ enum class ExprKind : bool { Dimension = false, Level = true }; constexpr VarKind getVarKindAllowedInExpr(ExprKind ek) { using VK = std::underlying_type_t; - return VarKind{2 * static_cast(!to_underlying(ek))}; + return VarKind{2 * static_cast(!llvm::to_underlying(ek))}; } static_assert(getVarKindAllowedInExpr(ExprKind::Dimension) == VarKind::Level && getVarKindAllowedInExpr(ExprKind::Level) == VarKind::Dimension); diff --git a/mlir/lib/Dialect/SparseTensor/IR/Detail/TemplateExtras.h b/mlir/lib/Dialect/SparseTensor/IR/Detail/TemplateExtras.h index 7f0c1fd8c46c78..383fbcb8dc5205 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/Detail/TemplateExtras.h +++ b/mlir/lib/Dialect/SparseTensor/IR/Detail/TemplateExtras.h @@ -37,13 +37,6 @@ operator<<(llvm::raw_ostream &os, T const &t) { return os; } -//===----------------------------------------------------------------------===// -/// Convert an enum to its underlying type. -template -constexpr std::underlying_type_t to_underlying(Enum e) noexcept { - return static_cast>(e); -} - //===----------------------------------------------------------------------===// template static constexpr bool IsZeroCostAbstraction = diff --git a/mlir/lib/Dialect/SparseTensor/IR/Detail/Var.h b/mlir/lib/Dialect/SparseTensor/IR/Detail/Var.h index 2606dd399eec81..81f480187c059e 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/Detail/Var.h +++ b/mlir/lib/Dialect/SparseTensor/IR/Detail/Var.h @@ -13,6 +13,7 @@ #include "mlir/IR/OpImplementation.h" #include "llvm/ADT/EnumeratedArray.h" +#include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringMap.h" @@ -31,13 +32,13 @@ namespace ir_detail { enum class VarKind { Symbol = 1, Dimension = 0, Level = 2 }; [[nodiscard]] constexpr bool isWF(VarKind vk) { - const auto vk_ = to_underlying(vk); + const auto vk_ = llvm::to_underlying(vk); return 0 <= vk_ && vk_ <= 2; } /// Swaps `Dimension` and `Level`, but leaves `Symbol` the same. constexpr VarKind flipVarKind(VarKind vk) { - return VarKind{2 - to_underlying(vk)}; + return VarKind{2 - llvm::to_underlying(vk)}; } static_assert(flipVarKind(VarKind::Symbol) == VarKind::Symbol && flipVarKind(VarKind::Dimension) == VarKind::Level && @@ -49,7 +50,7 @@ constexpr char toChar(VarKind vk) { // in the range [-44..126] (where that lower bound is under worst-case // rearranging of the expression); and `int_fast8_t` is the fastest type // which can support that range without over-/underflow. - const auto vk_ = static_cast(to_underlying(vk)); + const auto vk_ = static_cast(llvm::to_underlying(vk)); return static_cast(100 + vk_ * (26 - vk_ * 11)); } static_assert(toChar(VarKind::Symbol) == 's' && @@ -100,7 +101,7 @@ class Var { public: constexpr Impl(VarKind vk, Num n) : data((static_cast(n) << 2) | - static_cast(to_underlying(vk))) { + static_cast(llvm::to_underlying(vk))) { assert(isWF(vk) && "unknown VarKind"); assert(isWF_Num(n) && "Var::Num is too large"); } @@ -215,7 +216,7 @@ class Ranks final { static constexpr unsigned to_index(VarKind vk) { assert(isWF(vk) && "unknown VarKind"); - return static_cast(to_underlying(vk)); + return static_cast(llvm::to_underlying(vk)); } public: @@ -349,7 +350,7 @@ class VarEnv final { /// to live too long. VarInfo const &access(VarInfo::ID id) const { // `SmallVector::operator[]` already asserts the index is in-bounds. - return vars[to_underlying(id)]; + return vars[llvm::to_underlying(id)]; } VarInfo const *access(std::optional oid) const { return oid ? &access(*oid) : nullptr; From 04dd2ac03a568af157f465ec1242ce8bd14e0dcf Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Mon, 30 Oct 2023 15:46:36 -0400 Subject: [PATCH 069/144] [RISCV][GlobalISel] Select G_GLOBAL_VALUE (#70091) G_GLOBAL_VALUE should be lowered into an absolute address if `-codemodel=small` is used or into a PC-relative if `-codemodel=medium` is used. PR #68380 tried to create special instructions to do this, but I don't see why we need to do that. --- .../CodeGen/GlobalISel/InstructionSelector.h | 8 ++ .../CodeGen/GlobalISel/InstructionSelect.cpp | 2 + .../RISCV/GISel/RISCVInstructionSelector.cpp | 127 +++++++++++++++++ .../instruction-select/global-value32.mir | 129 ++++++++++++++++++ .../instruction-select/global-value64.mir | 128 +++++++++++++++++ 5 files changed, 394 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/global-value32.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/global-value64.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 1662136cfa94af..8331cb58a0991a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -31,6 +31,14 @@ class InstructionSelector : public GIMatchTableExecutor { /// for I in all mutated/inserted instructions: /// !isPreISelGenericOpcode(I.getOpcode()) virtual bool select(MachineInstr &I) = 0; + + void setTargetPassConfig(const TargetPassConfig *T) { TPC = T; } + + void setRemarkEmitter(MachineOptimizationRemarkEmitter *M) { MORE = M; } + +protected: + const TargetPassConfig *TPC = nullptr; + MachineOptimizationRemarkEmitter *MORE = nullptr; }; } // namespace llvm diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 75f1fbc3b2d1b3..baea773cf528e9 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -90,6 +90,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { const TargetPassConfig &TPC = getAnalysis(); InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); + ISel->setTargetPassConfig(&TPC); CodeGenOptLevel OldOptLevel = OptLevel; auto RestoreOptLevel = make_scope_exit([=]() { OptLevel = OldOptLevel; }); @@ -109,6 +110,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // An optimization remark emitter. Used to report failures. MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); + ISel->setRemarkEmitter(&MORE); // FIXME: There are many other MF/MFI fields we need to initialize. diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index bd3662c942de88..b03be71ed7b2a6 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -63,6 +63,8 @@ class RISCVInstructionSelector : public InstructionSelector { bool selectCopy(MachineInstr &MI, MachineRegisterInfo &MRI) const; bool selectConstant(MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const; + bool selectGlobalValue(MachineInstr &MI, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) const; bool selectSExtInreg(MachineInstr &MI, MachineIRBuilder &MIB) const; bool selectSelect(MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const; @@ -98,6 +100,7 @@ class RISCVInstructionSelector : public InstructionSelector { const RISCVInstrInfo &TII; const RISCVRegisterInfo &TRI; const RISCVRegisterBankInfo &RBI; + const RISCVTargetMachine &TM; // FIXME: This is necessary because DAGISel uses "Subtarget->" and GlobalISel // uses "STI." in the code generated by TableGen. We need to unify the name of @@ -123,6 +126,7 @@ RISCVInstructionSelector::RISCVInstructionSelector( const RISCVTargetMachine &TM, const RISCVSubtarget &STI, const RISCVRegisterBankInfo &RBI) : STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), + TM(TM), #define GET_GLOBALISEL_PREDICATES_INIT #include "RISCVGenGlobalISel.inc" @@ -346,6 +350,8 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { return selectCopy(MI, MRI); case TargetOpcode::G_CONSTANT: return selectConstant(MI, MIB, MRI); + case TargetOpcode::G_GLOBAL_VALUE: + return selectGlobalValue(MI, MIB, MRI); case TargetOpcode::G_BRCOND: { // TODO: Fold with G_ICMP. auto Bcc = @@ -543,6 +549,127 @@ bool RISCVInstructionSelector::selectConstant(MachineInstr &MI, return true; } +bool RISCVInstructionSelector::selectGlobalValue( + MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const { + assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE && + "Expected G_GLOBAL_VALUE"); + + auto *GV = MI.getOperand(1).getGlobal(); + if (GV->isThreadLocal()) { + // TODO: implement this case. + return false; + } + + Register DefReg = MI.getOperand(0).getReg(); + const LLT DefTy = MRI.getType(DefReg); + MachineInstr *Result = nullptr; + + // When HWASAN is used and tagging of global variables is enabled + // they should be accessed via the GOT, since the tagged address of a global + // is incompatible with existing code models. This also applies to non-pic + // mode. + if (TM.isPositionIndependent() || Subtarget->allowTaggedGlobals()) { + if (GV->isDSOLocal() && !Subtarget->allowTaggedGlobals()) { + // Use PC-relative addressing to access the symbol. This generates the + // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) + // %pcrel_lo(auipc)). + Result = MIB.buildInstr(RISCV::PseudoLLA) + .addDef(DefReg) + .addGlobalAddress(GV, 0); + } else { + // Use PC-relative addressing to access the GOT for this symbol, then + // load the address from the GOT. This generates the pattern (PseudoLGA + // sym), which expands to (ld (addi (auipc %got_pcrel_hi(sym)) + // %pcrel_lo(auipc))). + MachineFunction &MF = *MI.getParent()->getParent(); + MachineMemOperand *MemOp = MF.getMachineMemOperand( + MachinePointerInfo::getGOT(MF), + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant, + DefTy, Align(DefTy.getSizeInBits() / 8)); + + Result = MIB.buildInstr(RISCV::PseudoLGA) + .addDef(DefReg) + .addGlobalAddress(GV, 0) + .addMemOperand(MemOp); + } + + if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI)) + return false; + + MI.eraseFromParent(); + return true; + } + + switch (TM.getCodeModel()) { + default: { + reportGISelFailure(const_cast(*MF), *TPC, *MORE, + getName(), "Unsupported code model for lowering", MI); + return false; + } + case CodeModel::Small: { + // Must lie within a single 2 GiB address range and must lie between + // absolute addresses -2 GiB and +2 GiB. This generates the pattern (addi + // (lui %hi(sym)) %lo(sym)). + Register AddrHiDest = MRI.createVirtualRegister(&RISCV::GPRRegClass); + MachineInstr *AddrHi = MIB.buildInstr(RISCV::LUI) + .addDef(AddrHiDest) + .addGlobalAddress(GV, RISCVII::MO_HI); + + if (!constrainSelectedInstRegOperands(*AddrHi, TII, TRI, RBI)) + return false; + + Result = MIB.buildInstr(RISCV::ADDI) + .addDef(DefReg) + .addReg(AddrHiDest) + .addGlobalAddress(GV, 0, RISCVII::MO_LO); + + if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI)) + return false; + + MI.eraseFromParent(); + return true; + } + case CodeModel::Medium: { + // Emit LGA/LLA instead of the sequence it expands to because the pcrel_lo + // relocation needs to reference a label that points to the auipc + // instruction itself, not the global. This cannot be done inside the + // instruction selector. + if (GV->hasExternalWeakLinkage()) { + // An extern weak symbol may be undefined, i.e. have value 0, which may + // not be within 2GiB of PC, so use GOT-indirect addressing to access the + // symbol. This generates the pattern (PseudoLGA sym), which expands to + // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). + MachineFunction &MF = *MI.getParent()->getParent(); + MachineMemOperand *MemOp = MF.getMachineMemOperand( + MachinePointerInfo::getGOT(MF), + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant, + DefTy, Align(DefTy.getSizeInBits() / 8)); + + Result = MIB.buildInstr(RISCV::PseudoLGA) + .addDef(DefReg) + .addGlobalAddress(GV, 0) + .addMemOperand(MemOp); + } else { + // Generate a sequence for accessing addresses within any 2GiB range + // within the address space. This generates the pattern (PseudoLLA sym), + // which expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). + Result = MIB.buildInstr(RISCV::PseudoLLA) + .addDef(DefReg) + .addGlobalAddress(GV, 0); + } + + if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI)) + return false; + + MI.eraseFromParent(); + return true; + } + } + return false; +} + bool RISCVInstructionSelector::selectSExtInreg(MachineInstr &MI, MachineIRBuilder &MIB) const { if (!STI.isRV64()) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/global-value32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/global-value32.mir new file mode 100644 index 00000000000000..bbc7bc893e682c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/global-value32.mir @@ -0,0 +1,129 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -mtriple=riscv32 -run-pass=instruction-select -relocation-model=pic \ +# RUN: %s -o - | FileCheck --check-prefix=RV32-PIE %s +# RUN: llc -mtriple=riscv32 -run-pass=instruction-select \ +# RUN: -mattr=+tagged-globals %s -o - | FileCheck \ +# RUN: --check-prefix=RV32-NOPIE-TAG %s +# RUN: llc -mtriple=riscv32 -run-pass=instruction-select -code-model=small \ +# RUN: %s -o - | FileCheck --check-prefix=RV32-SMALL-NOPIE-NOTAG %s +# RUN: llc -mtriple=riscv32 -run-pass=instruction-select -code-model=medium \ +# RUN: %s -o - | FileCheck --check-prefix=RV32-MED %s + + +--- | + @x = global i32 0, align 4 + define ptr @global_addr() { + entry: + ret ptr @x + } + @y = extern_weak global i32, align 4 + define ptr @extern_weak_global_addr() { + entry: + ret ptr @y + } + @z = dso_local global i32 0, align 4 + define ptr @local_global_addr() { + entry: + ret ptr @z + } +... +--- +name: global_addr +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gprb, preferred-register: '' } +body: | + bb.1.entry: + ; RV32-PIE-LABEL: name: global_addr + ; RV32-PIE: [[PseudoLGA:%[0-9]+]]:gpr = PseudoLGA @x :: (dereferenceable invariant load (p0) from got) + ; RV32-PIE-NEXT: $x10 = COPY [[PseudoLGA]] + ; RV32-PIE-NEXT: PseudoRET implicit $x10 + ; + ; RV32-NOPIE-TAG-LABEL: name: global_addr + ; RV32-NOPIE-TAG: [[PseudoLGA:%[0-9]+]]:gpr = PseudoLGA @x :: (dereferenceable invariant load (p0) from got) + ; RV32-NOPIE-TAG-NEXT: $x10 = COPY [[PseudoLGA]] + ; RV32-NOPIE-TAG-NEXT: PseudoRET implicit $x10 + ; + ; RV32-SMALL-NOPIE-NOTAG-LABEL: name: global_addr + ; RV32-SMALL-NOPIE-NOTAG: [[LUI:%[0-9]+]]:gpr = LUI @x + 4 + ; RV32-SMALL-NOPIE-NOTAG-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LUI]], target-flags(riscv-lo) @x + ; RV32-SMALL-NOPIE-NOTAG-NEXT: $x10 = COPY [[ADDI]] + ; RV32-SMALL-NOPIE-NOTAG-NEXT: PseudoRET implicit $x10 + ; + ; RV32-MED-LABEL: name: global_addr + ; RV32-MED: [[PseudoLLA:%[0-9]+]]:gpr = PseudoLLA @x + ; RV32-MED-NEXT: $x10 = COPY [[PseudoLLA]] + ; RV32-MED-NEXT: PseudoRET implicit $x10 + %0:gprb(p0) = G_GLOBAL_VALUE @x + $x10 = COPY %0(p0) + PseudoRET implicit $x10 +... +--- +name: extern_weak_global_addr +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gprb, preferred-register: '' } +body: | + bb.1.entry: + ; RV32-PIE-LABEL: name: extern_weak_global_addr + ; RV32-PIE: [[PseudoLGA:%[0-9]+]]:gpr = PseudoLGA @y :: (dereferenceable invariant load (p0) from got) + ; RV32-PIE-NEXT: $x10 = COPY [[PseudoLGA]] + ; RV32-PIE-NEXT: PseudoRET implicit $x10 + ; + ; RV32-NOPIE-TAG-LABEL: name: extern_weak_global_addr + ; RV32-NOPIE-TAG: [[PseudoLGA:%[0-9]+]]:gpr = PseudoLGA @y :: (dereferenceable invariant load (p0) from got) + ; RV32-NOPIE-TAG-NEXT: $x10 = COPY [[PseudoLGA]] + ; RV32-NOPIE-TAG-NEXT: PseudoRET implicit $x10 + ; + ; RV32-SMALL-NOPIE-NOTAG-LABEL: name: extern_weak_global_addr + ; RV32-SMALL-NOPIE-NOTAG: [[LUI:%[0-9]+]]:gpr = LUI @y + 4 + ; RV32-SMALL-NOPIE-NOTAG-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LUI]], target-flags(riscv-lo) @y + ; RV32-SMALL-NOPIE-NOTAG-NEXT: $x10 = COPY [[ADDI]] + ; RV32-SMALL-NOPIE-NOTAG-NEXT: PseudoRET implicit $x10 + ; + ; RV32-MED-LABEL: name: extern_weak_global_addr + ; RV32-MED: [[PseudoLGA:%[0-9]+]]:gpr = PseudoLGA @y :: (dereferenceable invariant load (p0) from got) + ; RV32-MED-NEXT: $x10 = COPY [[PseudoLGA]] + ; RV32-MED-NEXT: PseudoRET implicit $x10 + %0:gprb(p0) = G_GLOBAL_VALUE @y + $x10 = COPY %0(p0) + PseudoRET implicit $x10 +... +--- +name: local_global_addr +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gprb, preferred-register: '' } +body: | + bb.1.entry: + ; RV32-PIE-LABEL: name: local_global_addr + ; RV32-PIE: [[PseudoLLA:%[0-9]+]]:gpr = PseudoLLA @z + ; RV32-PIE-NEXT: $x10 = COPY [[PseudoLLA]] + ; RV32-PIE-NEXT: PseudoRET implicit $x10 + ; + ; RV32-NOPIE-TAG-LABEL: name: local_global_addr + ; RV32-NOPIE-TAG: [[PseudoLGA:%[0-9]+]]:gpr = PseudoLGA @z :: (dereferenceable invariant load (p0) from got) + ; RV32-NOPIE-TAG-NEXT: $x10 = COPY [[PseudoLGA]] + ; RV32-NOPIE-TAG-NEXT: PseudoRET implicit $x10 + ; + ; RV32-SMALL-NOPIE-NOTAG-LABEL: name: local_global_addr + ; RV32-SMALL-NOPIE-NOTAG: [[LUI:%[0-9]+]]:gpr = LUI @z + 4 + ; RV32-SMALL-NOPIE-NOTAG-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LUI]], target-flags(riscv-lo) @z + ; RV32-SMALL-NOPIE-NOTAG-NEXT: $x10 = COPY [[ADDI]] + ; RV32-SMALL-NOPIE-NOTAG-NEXT: PseudoRET implicit $x10 + ; + ; RV32-MED-LABEL: name: local_global_addr + ; RV32-MED: [[PseudoLLA:%[0-9]+]]:gpr = PseudoLLA @z + ; RV32-MED-NEXT: $x10 = COPY [[PseudoLLA]] + ; RV32-MED-NEXT: PseudoRET implicit $x10 + %0:gprb(p0) = G_GLOBAL_VALUE @z + $x10 = COPY %0(p0) + PseudoRET implicit $x10 +... + diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/global-value64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/global-value64.mir new file mode 100644 index 00000000000000..440afaab7b28ef --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/global-value64.mir @@ -0,0 +1,128 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -mtriple=riscv64 -run-pass=instruction-select -relocation-model=pic \ +# RUN: %s -o - | FileCheck --check-prefix=RV64-PIE %s +# RUN: llc -mtriple=riscv64 -run-pass=instruction-select \ +# RUN: -mattr=+tagged-globals %s -o - | FileCheck \ +# RUN: --check-prefix=RV64-NOPIE-TAG %s +# RUN: llc -mtriple=riscv64 -run-pass=instruction-select -code-model=small \ +# RUN: %s -o - | FileCheck --check-prefix=RV64-SMALL-NOPIE-NOTAG %s +# RUN: llc -mtriple=riscv64 -run-pass=instruction-select -code-model=medium \ +# RUN: %s -o - | FileCheck --check-prefix=RV64-MED %s + +--- | + @x = global i32 0, align 4 + define ptr @global_addr() { + entry: + ret ptr @x + } + @y = extern_weak global i32, align 4 + define ptr @extern_weak_global_addr() { + entry: + ret ptr @y + } + @z = dso_local global i32 0, align 4 + define ptr @local_global_addr() { + entry: + ret ptr @z + } +... +--- +name: global_addr +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gprb, preferred-register: '' } +body: | + bb.1.entry: + ; RV64-PIE-LABEL: name: global_addr + ; RV64-PIE: [[PseudoLGA:%[0-9]+]]:gpr = PseudoLGA @x :: (dereferenceable invariant load (p0) from got) + ; RV64-PIE-NEXT: $x10 = COPY [[PseudoLGA]] + ; RV64-PIE-NEXT: PseudoRET implicit $x10 + ; + ; RV64-NOPIE-TAG-LABEL: name: global_addr + ; RV64-NOPIE-TAG: [[PseudoLGA:%[0-9]+]]:gpr = PseudoLGA @x :: (dereferenceable invariant load (p0) from got) + ; RV64-NOPIE-TAG-NEXT: $x10 = COPY [[PseudoLGA]] + ; RV64-NOPIE-TAG-NEXT: PseudoRET implicit $x10 + ; + ; RV64-SMALL-NOPIE-NOTAG-LABEL: name: global_addr + ; RV64-SMALL-NOPIE-NOTAG: [[LUI:%[0-9]+]]:gpr = LUI @x + 4 + ; RV64-SMALL-NOPIE-NOTAG-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LUI]], target-flags(riscv-lo) @x + ; RV64-SMALL-NOPIE-NOTAG-NEXT: $x10 = COPY [[ADDI]] + ; RV64-SMALL-NOPIE-NOTAG-NEXT: PseudoRET implicit $x10 + ; + ; RV64-MED-LABEL: name: global_addr + ; RV64-MED: [[PseudoLLA:%[0-9]+]]:gpr = PseudoLLA @x + ; RV64-MED-NEXT: $x10 = COPY [[PseudoLLA]] + ; RV64-MED-NEXT: PseudoRET implicit $x10 + %0:gprb(p0) = G_GLOBAL_VALUE @x + $x10 = COPY %0(p0) + PseudoRET implicit $x10 +... +--- +name: extern_weak_global_addr +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gprb, preferred-register: '' } +body: | + bb.1.entry: + ; RV64-PIE-LABEL: name: extern_weak_global_addr + ; RV64-PIE: [[PseudoLGA:%[0-9]+]]:gpr = PseudoLGA @y :: (dereferenceable invariant load (p0) from got) + ; RV64-PIE-NEXT: $x10 = COPY [[PseudoLGA]] + ; RV64-PIE-NEXT: PseudoRET implicit $x10 + ; + ; RV64-NOPIE-TAG-LABEL: name: extern_weak_global_addr + ; RV64-NOPIE-TAG: [[PseudoLGA:%[0-9]+]]:gpr = PseudoLGA @y :: (dereferenceable invariant load (p0) from got) + ; RV64-NOPIE-TAG-NEXT: $x10 = COPY [[PseudoLGA]] + ; RV64-NOPIE-TAG-NEXT: PseudoRET implicit $x10 + ; + ; RV64-SMALL-NOPIE-NOTAG-LABEL: name: extern_weak_global_addr + ; RV64-SMALL-NOPIE-NOTAG: [[LUI:%[0-9]+]]:gpr = LUI @y + 4 + ; RV64-SMALL-NOPIE-NOTAG-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LUI]], target-flags(riscv-lo) @y + ; RV64-SMALL-NOPIE-NOTAG-NEXT: $x10 = COPY [[ADDI]] + ; RV64-SMALL-NOPIE-NOTAG-NEXT: PseudoRET implicit $x10 + ; + ; RV64-MED-LABEL: name: extern_weak_global_addr + ; RV64-MED: [[PseudoLGA:%[0-9]+]]:gpr = PseudoLGA @y :: (dereferenceable invariant load (p0) from got) + ; RV64-MED-NEXT: $x10 = COPY [[PseudoLGA]] + ; RV64-MED-NEXT: PseudoRET implicit $x10 + %0:gprb(p0) = G_GLOBAL_VALUE @y + $x10 = COPY %0(p0) + PseudoRET implicit $x10 +... +--- +name: local_global_addr +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gprb, preferred-register: '' } +body: | + bb.1.entry: + ; RV64-PIE-LABEL: name: local_global_addr + ; RV64-PIE: [[PseudoLLA:%[0-9]+]]:gpr = PseudoLLA @z + ; RV64-PIE-NEXT: $x10 = COPY [[PseudoLLA]] + ; RV64-PIE-NEXT: PseudoRET implicit $x10 + ; + ; RV64-NOPIE-TAG-LABEL: name: local_global_addr + ; RV64-NOPIE-TAG: [[PseudoLGA:%[0-9]+]]:gpr = PseudoLGA @z :: (dereferenceable invariant load (p0) from got) + ; RV64-NOPIE-TAG-NEXT: $x10 = COPY [[PseudoLGA]] + ; RV64-NOPIE-TAG-NEXT: PseudoRET implicit $x10 + ; + ; RV64-SMALL-NOPIE-NOTAG-LABEL: name: local_global_addr + ; RV64-SMALL-NOPIE-NOTAG: [[LUI:%[0-9]+]]:gpr = LUI @z + 4 + ; RV64-SMALL-NOPIE-NOTAG-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LUI]], target-flags(riscv-lo) @z + ; RV64-SMALL-NOPIE-NOTAG-NEXT: $x10 = COPY [[ADDI]] + ; RV64-SMALL-NOPIE-NOTAG-NEXT: PseudoRET implicit $x10 + ; + ; RV64-MED-LABEL: name: local_global_addr + ; RV64-MED: [[PseudoLLA:%[0-9]+]]:gpr = PseudoLLA @z + ; RV64-MED-NEXT: $x10 = COPY [[PseudoLLA]] + ; RV64-MED-NEXT: PseudoRET implicit $x10 + %0:gprb(p0) = G_GLOBAL_VALUE @z + $x10 = COPY %0(p0) + PseudoRET implicit $x10 +... + From 093bc6b61a6c87e138a4bf89fe620f6e63d20eda Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Mon, 30 Oct 2023 15:47:45 -0400 Subject: [PATCH 070/144] [RISCV] SiFive7 VLDS Sched should not depend on VL when stride is x0. (#70266) When stride is x0, a strided load should behave like a unit stride load, which uses the VLDE sched class. --------- Co-authored-by: Wang Pengcheng --- llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 34 ++++- llvm/lib/Target/RISCV/RISCVScheduleV.td | 42 ++++++ .../llvm-mca/RISCV/SiFive7/strided-load-x0.s | 125 ++++++++++++++++++ 3 files changed, 194 insertions(+), 7 deletions(-) create mode 100644 llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index d2447cf23e266c..9da68dc9a139d3 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -455,11 +455,19 @@ foreach mx = SchedMxList in { // specific suffixes, but since SEW is already encoded in the name of the // resource, we do not need to use LMULSEWXXX constructors. However, we do // use the SEW from the name to determine the number of Cycles. + +// This predicate is true when the rs2 operand of vlse or vsse is x0, false +// otherwise. +def VLDSX0Pred : MCSchedPredicate>; + foreach mx = SchedMxList in { + defvar VLDSX0Cycles = SiFive7GetCyclesDefault.c; defvar Cycles = SiFive7GetCyclesOnePerElement.c; defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VL], + 4, [VLDSX0Cycles], !add(3, Cycles), + [Cycles], mx, IsWorstCase>; let Latency = !add(3, Cycles), ReleaseAtCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVLDS8", [SiFive7VL], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VL], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VL], mx, IsWorstCase>; } @@ -469,11 +477,17 @@ foreach mx = SchedMxList in { defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VS], mx, IsWorstCase>; } } -foreach mx = SchedMxList in { +// TODO: The MxLists need to be filtered by EEW. We only need to support +// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8 +// since LMUL >= 16/64. +foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in { + defvar VLDSX0Cycles = SiFive7GetCyclesDefault.c; defvar Cycles = SiFive7GetCyclesOnePerElement.c; defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VL], + 4, [VLDSX0Cycles], !add(3, Cycles), + [Cycles], mx, IsWorstCase>; let Latency = !add(3, Cycles), ReleaseAtCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVLDS16", [SiFive7VL], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VL], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VL], mx, IsWorstCase>; } @@ -483,11 +497,14 @@ foreach mx = SchedMxList in { defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VS], mx, IsWorstCase>; } } -foreach mx = SchedMxList in { +foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in { + defvar VLDSX0Cycles = SiFive7GetCyclesDefault.c; defvar Cycles = SiFive7GetCyclesOnePerElement.c; defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VL], + 4, [VLDSX0Cycles], !add(3, Cycles), + [Cycles], mx, IsWorstCase>; let Latency = !add(3, Cycles), ReleaseAtCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVLDS32", [SiFive7VL], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VL], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VL], mx, IsWorstCase>; } @@ -497,11 +514,14 @@ foreach mx = SchedMxList in { defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VS], mx, IsWorstCase>; } } -foreach mx = SchedMxList in { +foreach mx = ["M1", "M2", "M4", "M8"] in { + defvar VLDSX0Cycles = SiFive7GetCyclesDefault.c; defvar Cycles = SiFive7GetCyclesOnePerElement.c; defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VL], + 4, [VLDSX0Cycles], !add(3, Cycles), + [Cycles], mx, IsWorstCase>; let Latency = !add(3, Cycles), ReleaseAtCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVLDS64", [SiFive7VL], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VL], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VL], mx, IsWorstCase>; } diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td index 7af7716c96b856..b5ddb8197993bf 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -62,6 +62,48 @@ multiclass LMULSEWWriteResMXSEW resources, def : WriteRes(name # "_WorstCase"), resources>; } +// Define a SchedAlias for the SchedWrite associated with (name, mx) whose +// behavior is aliased to a Variant. The Variant has Latency predLad and +// ReleaseAtCycles predCycles if the SchedPredicate Pred is true, otherwise has +// Latency noPredLat and ReleaseAtCycles noPredCycles. The WorstCase SchedWrite +// is created similiarly if IsWorstCase is true. +multiclass LMULWriteResMXVariant resources, + int predLat, list predCycles, + int noPredLat, list noPredCycles, + string mx, bit IsWorstCase> { + defvar nameMX = name # "_" # mx; + + // Define the different behaviors + def NAME # nameMX # "_Pred" : SchedWriteRes { + let Latency = predLat; + let ReleaseAtCycles = predCycles; + } + def NAME # nameMX # "_NoPred" : SchedWriteRes { + let Latency = noPredLat; + let ReleaseAtCycles = noPredCycles; + } + + // Tie behavior to predicate + def NAME # nameMX # "_Variant" : SchedWriteVariant<[ + SchedVar(NAME # nameMX # "_Pred")]>, + SchedVar(NAME # nameMX # "_NoPred")]> + ]>; + def : SchedAlias< + !cast(nameMX), + !cast(NAME # nameMX # "_Variant")>; + + if IsWorstCase then { + def NAME # name # "_WorstCase_Variant" : SchedWriteVariant<[ + SchedVar(NAME # nameMX # "_Pred")]>, + SchedVar(NAME # nameMX # "_NoPred")]> + ]>; + def : SchedAlias< + !cast(name # "_WorstCase"), + !cast(NAME # name # "_WorstCase_Variant")>; + } +} + // Define multiclasses to define SchedWrite, SchedRead, WriteRes, and // ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the // SchedMxList variants above. Each multiclass is responsible for defining diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s b/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s new file mode 100644 index 00000000000000..8b52d0ece63593 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s @@ -0,0 +1,125 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -debug -mtriple=riscv64 -mcpu=sifive-x280 -iterations=1 < %s | FileCheck %s + +vsetvli zero, zero, e32, m1, tu, mu + +vlse8.v v1, (a1), a2 +vlse16.v v1, (a1), a2 +vlse32.v v1, (a1), a2 +vlse64.v v1, (a1), a2 + +vlse8.v v1, (a1), zero +vlse16.v v1, (a1), zero +vlse32.v v1, (a1), zero +vlse64.v v1, (a1), zero + +vle8.v v1, (a1) +vle16.v v1, (a1) +vle32.v v1, (a1) +vle64.v v1, (a1) + +vsetvli zero, zero, e64, m1, tu, mu + +vlse8.v v1, (a1), a2 +vlse16.v v1, (a1), a2 +vlse32.v v1, (a1), a2 +vlse64.v v1, (a1), a2 + +vlse8.v v1, (a1), zero +vlse16.v v1, (a1), zero +vlse32.v v1, (a1), zero +vlse64.v v1, (a1), zero + +vle8.v v1, (a1) +vle16.v v1, (a1) +vle32.v v1, (a1) +vle64.v v1, (a1) + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 26 +# CHECK-NEXT: Total Cycles: 3523 +# CHECK-NEXT: Total uOps: 26 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.01 +# CHECK-NEXT: IPC: 0.01 +# CHECK-NEXT: Block RThroughput: 3517.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: 1 515 512.00 * vlse8.v v1, (a1), a2 +# CHECK-NEXT: 1 259 256.00 * vlse16.v v1, (a1), a2 +# CHECK-NEXT: 1 19 16.00 * vlse32.v v1, (a1), a2 +# CHECK-NEXT: 1 67 64.00 * vlse64.v v1, (a1), a2 +# CHECK-NEXT: 1 515 512.00 * vlse8.v v1, (a1), zero +# CHECK-NEXT: 1 259 256.00 * vlse16.v v1, (a1), zero +# CHECK-NEXT: 1 19 16.00 * vlse32.v v1, (a1), zero +# CHECK-NEXT: 1 67 64.00 * vlse64.v v1, (a1), zero +# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a1) +# CHECK-NEXT: 1 4 1.00 * vle16.v v1, (a1) +# CHECK-NEXT: 1 4 2.00 * vle32.v v1, (a1) +# CHECK-NEXT: 1 4 4.00 * vle64.v v1, (a1) +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: 1 515 512.00 * vlse8.v v1, (a1), a2 +# CHECK-NEXT: 1 259 256.00 * vlse16.v v1, (a1), a2 +# CHECK-NEXT: 1 131 128.00 * vlse32.v v1, (a1), a2 +# CHECK-NEXT: 1 11 8.00 * vlse64.v v1, (a1), a2 +# CHECK-NEXT: 1 515 512.00 * vlse8.v v1, (a1), zero +# CHECK-NEXT: 1 259 256.00 * vlse16.v v1, (a1), zero +# CHECK-NEXT: 1 131 128.00 * vlse32.v v1, (a1), zero +# CHECK-NEXT: 1 11 8.00 * vlse64.v v1, (a1), zero +# CHECK-NEXT: 1 4 1.00 * vle8.v v1, (a1) +# CHECK-NEXT: 1 4 1.00 * vle16.v v1, (a1) +# CHECK-NEXT: 1 4 1.00 * vle32.v v1, (a1) +# CHECK-NEXT: 1 4 2.00 * vle64.v v1, (a1) + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFive7FDiv +# CHECK-NEXT: [1] - SiFive7IDiv +# CHECK-NEXT: [2] - SiFive7PipeA +# CHECK-NEXT: [3] - SiFive7PipeB +# CHECK-NEXT: [4] - SiFive7PipeV +# CHECK-NEXT: [5] - SiFive7VA +# CHECK-NEXT: [6] - SiFive7VL +# CHECK-NEXT: [7] - SiFive7VS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: - - 2.00 - 3517.00 - 3517.00 - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e32, m1, tu, mu +# CHECK-NEXT: - - - - 512.00 - 512.00 - vlse8.v v1, (a1), a2 +# CHECK-NEXT: - - - - 256.00 - 256.00 - vlse16.v v1, (a1), a2 +# CHECK-NEXT: - - - - 16.00 - 16.00 - vlse32.v v1, (a1), a2 +# CHECK-NEXT: - - - - 64.00 - 64.00 - vlse64.v v1, (a1), a2 +# CHECK-NEXT: - - - - 512.00 - 512.00 - vlse8.v v1, (a1), zero +# CHECK-NEXT: - - - - 256.00 - 256.00 - vlse16.v v1, (a1), zero +# CHECK-NEXT: - - - - 16.00 - 16.00 - vlse32.v v1, (a1), zero +# CHECK-NEXT: - - - - 64.00 - 64.00 - vlse64.v v1, (a1), zero +# CHECK-NEXT: - - - - 1.00 - 1.00 - vle8.v v1, (a1) +# CHECK-NEXT: - - - - 1.00 - 1.00 - vle16.v v1, (a1) +# CHECK-NEXT: - - - - 2.00 - 2.00 - vle32.v v1, (a1) +# CHECK-NEXT: - - - - 4.00 - 4.00 - vle64.v v1, (a1) +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, zero, e64, m1, tu, mu +# CHECK-NEXT: - - - - 512.00 - 512.00 - vlse8.v v1, (a1), a2 +# CHECK-NEXT: - - - - 256.00 - 256.00 - vlse16.v v1, (a1), a2 +# CHECK-NEXT: - - - - 128.00 - 128.00 - vlse32.v v1, (a1), a2 +# CHECK-NEXT: - - - - 8.00 - 8.00 - vlse64.v v1, (a1), a2 +# CHECK-NEXT: - - - - 512.00 - 512.00 - vlse8.v v1, (a1), zero +# CHECK-NEXT: - - - - 256.00 - 256.00 - vlse16.v v1, (a1), zero +# CHECK-NEXT: - - - - 128.00 - 128.00 - vlse32.v v1, (a1), zero +# CHECK-NEXT: - - - - 8.00 - 8.00 - vlse64.v v1, (a1), zero +# CHECK-NEXT: - - - - 1.00 - 1.00 - vle8.v v1, (a1) +# CHECK-NEXT: - - - - 1.00 - 1.00 - vle16.v v1, (a1) +# CHECK-NEXT: - - - - 1.00 - 1.00 - vle32.v v1, (a1) +# CHECK-NEXT: - - - - 2.00 - 2.00 - vle64.v v1, (a1) From 651d88e3320472236d5d6e5806116c9bffd0f829 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Mon, 30 Oct 2023 15:58:33 -0400 Subject: [PATCH 071/144] [mlir][vector] Update reduction kind docs. NFC. (#70673) Update the documentation surrounding reduction kinds. Highlight different min/max reduction kinds for signed/unsigned integers and floats. Update IR examples. --- .../mlir/Dialect/Vector/IR/VectorOps.td | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index 168ff45ca61542..62ae300b3cdc8d 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -86,8 +86,9 @@ def Vector_ContractionOp : An optional kind attribute may be used to specify the combining function between the intermediate result and accumulator argument of rank K. This - attribute can take the values add/mul/min/max for int/fp, and/or/xor for - int only. The default is "add". + attribute can take the values `add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui` + /`and`/`or`/`xor` for integers, and `add`/`mul`/`minf`/`maxf`/`minimumf` + /`maximumf` for floats. The default is `add`. Example: @@ -149,7 +150,7 @@ def Vector_ContractionOp : #contraction_trait = { indexing_maps = #contraction_accesses, iterator_types = ["reduction"], - kind = #vector.kind + kind = #vector.kind } %6 = vector.contract #contraction_trait %0, %1, %2 : vector<10xf32>, vector<10xf32> into f32 @@ -232,7 +233,8 @@ def Vector_ReductionOp : let summary = "reduction operation"; let description = [{ Reduces an 1-D vector "horizontally" into a scalar using the given - operation (add/mul/min/max for int/fp and and/or/xor for int only). + operation: `add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui`/`and`/`or`/`xor` for + integers, and `add`/`mul`/`minf`/`maxf`/`minimumf`/`maximumf` for floats. Reductions also allow an optional fused accumulator. Note that these operations are restricted to 1-D vectors to remain @@ -289,8 +291,9 @@ def Vector_MultiDimReductionOp : let summary = "Multi-dimensional reduction operation"; let description = [{ Reduces an n-D vector into an (n-k)-D vector (or a scalar when k == n) - using the given operation (add/mul/min/max for int/fp and and/or/xor for - int only). + using the given operation: `add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui` + /`and`/`or`/`xor` for integers, and `add`/`mul`/`minf`/`maxf`/`minimumf` + /`maximumf` for floats. Takes an initial accumulator operand. Example: @@ -937,11 +940,12 @@ def Vector_OuterProductOp : lowered to the LLVMIR dialect, this form emits `llvm.intr.fma`, which is guaranteed to lower to actual `fma` instructions on x86. - An optional kind attribute may be specified to be add/mul/min/max - for int/fp, and and/or/xor for int only. The default is "add", in which - case the operation returns a fused multiply-add. In other cases it returns - a multiply followed by the appropriate operation (for example, a compare and - select for "max"). + An optional kind attribute may be specified to be: `add`/`mul`/`minsi` + /`minui`/`maxsi`/`maxui`/`and`/`or`/`xor` for integers, and `add`/`mul` + /`minf`/`maxf`/`minimumf`/`maximumf` for floats. + The default is `add`, in which case the operation returns a fused + multiply-add. In other cases it returns a multiply followed by the + appropriate operation (for example, a compare and select for `maxf`). Example: @@ -953,7 +957,7 @@ def Vector_OuterProductOp : vector<4xf32>, vector<8xf32>, vector<4x8xf32> return %3: vector<4x8xf32> - %4 = vector.outerproduct %0, %1, %2 {kind = #vector.kind}: + %4 = vector.outerproduct %0, %1, %2 {kind = #vector.kind}: vector<4xf32>, vector<8xf32>, vector<4x8xf32> return %3: vector<4x8xf32> @@ -2764,10 +2768,10 @@ def Vector_ScanOp : let description = [{ Performs an inclusive/exclusive scan on an n-D vector along a single dimension returning an n-D result vector using the given - operation (add/mul/min/max for int/fp and and/or/xor for - int only) and a specified value for the initial value. The operator - returns the result of scan as well as the result of the last - reduction in the scan. + operation (`add`/`mul`/`minsi`/`minui`/`maxsi`/`maxui`/`and`/`or`/`xor` for + integers, and `add`/`mul`/`minf`/`maxf`/`minimumf`/`maximumf` for floats), + and a specified value for the initial value. The operator returns the + result of scan as well as the result of the last reduction in the scan. Example: From 9e390a140857355b3524924075a261b9d06ae850 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 30 Oct 2023 14:58:09 -0500 Subject: [PATCH 072/144] [libc][Obvious] Fix missing semicolon in AMDGPU loader implementation Summary: Title --- libc/utils/gpu/loader/amdgpu/Loader.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp index c2a11fd8aab72b..2f99076a720e2a 100644 --- a/libc/utils/gpu/loader/amdgpu/Loader.cpp +++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp @@ -248,9 +248,8 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) | (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE); - uint32_t header_word = - header | (setup << 16u) __atomic_store_n((uint32_t *)&packet->header, - header_word, __ATOMIC_RELEASE); + uint32_t header_word = header | (setup << 16u); + __atomic_store_n((uint32_t *)&packet->header, header_word, __ATOMIC_RELEASE); hsa_signal_store_relaxed(queue->doorbell_signal, packet_id); // Wait until the kernel has completed execution on the device. Periodically From 0d5b7dd25cc47123d6920207c089c8a9b98571b4 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Mon, 30 Oct 2023 15:59:05 -0400 Subject: [PATCH 073/144] [OpenMP] Add a test for D158802 (#70678) In D158802 we honored user's `thread_limit` value even with the optimization introduced in D152014. This patch adds a simple test. --- .../small_trip_count_thread_limit.cpp | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 openmp/libomptarget/test/offloading/small_trip_count_thread_limit.cpp diff --git a/openmp/libomptarget/test/offloading/small_trip_count_thread_limit.cpp b/openmp/libomptarget/test/offloading/small_trip_count_thread_limit.cpp new file mode 100644 index 00000000000000..9796c2dc11663b --- /dev/null +++ b/openmp/libomptarget/test/offloading/small_trip_count_thread_limit.cpp @@ -0,0 +1,31 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic +// RUN: env LIBOMPTARGET_INFO=16 \ +// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic + +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +int main(int argc, char *argv[]) { + constexpr const int block_size = 256; + constexpr const int grid_size = 4; + constexpr const int count = block_size * grid_size; + + int *data = new int[count]; + +#pragma omp target teams distribute parallel for thread_limit(block_size) map(from: data[0:count]) + for (int i = 0; i < count; ++i) + data[i] = i; + + for (int i = 0; i < count; ++i) + if (data[i] != i) + return 1; + + delete[] data; + + return 0; +} + +// CHECK: Launching kernel {{.*}} with 4 blocks and 256 threads in SPMD mode From 68c384676cf92289d823576f915ed296d209354f Mon Sep 17 00:00:00 2001 From: Andrew Gozillon Date: Mon, 30 Oct 2023 14:51:47 -0500 Subject: [PATCH 074/144] [Flang][MLIR][OpenMP] Temporarily re-add basic handling of uses in target regions to avoid gfortran test-suite regressions This was a regression introduced by myself in: https://github.com/llvm/llvm-project/commit/6a62707c048e16ce9bad37ed8e3520799139436b where I too hastily removed the basic handling of implicit captures we have currently. This will be superseded by all implicit captures being added to target operations map_info entries in a soon landing series of patches, however, that is currently not the case so we must continue to do some basic handling of these captures for the time being. This patch re-adds that behaviour to avoid regressions. Unfortunately this means some test changes as well as getUsedValuesDefinedAbove grabs constants used outside of the target region which aren't handled particularly well currently. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 18 ++++++++++++++++ .../basic-target-region-1D-array-section.f90 | 7 ++++--- .../basic-target-region-3D-array-section.f90 | 12 ++++++++--- .../fortran/basic-target-region-3D-array.f90 | 21 +++++++++++++------ 4 files changed, 46 insertions(+), 12 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6a515c2ba4e87e..1daf60b8659bb6 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -32,6 +32,7 @@ #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include #include #include @@ -2407,6 +2408,23 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, kernelInput.push_back(mapData.OriginalValue[i]); } + // Do some very basic handling of implicit captures that are caught + // by use in the target region. + // TODO/FIXME: Remove on addition of IsolatedFromAbove patch series + // as this will become redundant and perhaps erroneous in cases + // where more complex implicit capture semantics are required. + llvm::SetVector uses; + getUsedValuesDefinedAbove(targetRegion, uses); + + for (mlir::Value use : uses) { + llvm::Value *useValue = moduleTranslation.lookupValue(use); + if (useValue && + !std::any_of( + mapData.OriginalValue.begin(), mapData.OriginalValue.end(), + [&](llvm::Value *mapValue) { return mapValue == useValue; })) + kernelInput.push_back(useValue); + } + builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget( ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB)); diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 index 11d3b6936bcea2..58f5379e330ec0 100644 --- a/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 +++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 @@ -14,10 +14,11 @@ program main integer :: write_arr(10) = (/0,0,0,0,0,0,0,0,0,0/) integer :: read_arr(10) = (/1,2,3,4,5,6,7,8,9,10/) integer :: i = 2 - - !$omp target map(to:read_arr(2:5)) map(from:write_arr(2:5)) map(tofrom:i) - do i = 2, 5 + integer :: j = 5 + !$omp target map(to:read_arr(2:5)) map(from:write_arr(2:5)) map(to:i,j) + do while (i <= j) write_arr(i) = read_arr(i) + i = i + 1 end do !$omp end target diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 index 28b2afced4d1bc..e3df7983e6b5c1 100644 --- a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 +++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 @@ -14,6 +14,7 @@ program main integer :: inArray(3,3,3) integer :: outArray(3,3,3) integer :: i, j, k + integer :: j2 = 3, k2 = 3 do i = 1, 3 do j = 1, 3 @@ -24,11 +25,16 @@ program main end do end do -!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3), j, k) - do j = 1, 3 - do k = 1, 3 +j = 1 +k = 1 +!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3), j, k, j2, k2) + do while (j <= j2) + k = 1 + do while (k <= k2) outArray(k, j, 2) = inArray(k, j, 2) + k = k + 1 end do + j = j + 1 end do !$omp end target diff --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 index 58f42138ad0aff..44ff394dcda16b 100644 --- a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 +++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 @@ -10,9 +10,9 @@ ! RUN: %libomptarget-compile-fortran-run-and-check-generic program main - implicit none integer :: x(2,2,2) - integer :: i = 1, j = 1, k = 1 + integer :: i, j, k + integer :: i2 = 2, j2 = 2, k2 = 2 integer :: counter = 1 do i = 1, 2 do j = 1, 2 @@ -22,14 +22,23 @@ program main end do end do -!$omp target map(tofrom:x, i, j, k, counter) - do i = 1, 2 - do j = 1, 2 - do k = 1, 2 +i = 1 +j = 1 +k = 1 + +!$omp target map(tofrom:x, counter) map(to: i, j, k, i2, j2, k2) + do while (i <= i2) + j = 1 + do while (j <= j2) + k = 1 + do while (k <= k2) x(i, j, k) = counter counter = counter + 1 + k = k + 1 end do + j = j + 1 end do + i = i + 1 end do !$omp end target From cc6f9cf5a2d5c246f30b74f748ee9a355e49d22a Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 30 Oct 2023 13:11:10 -0700 Subject: [PATCH 075/144] [RISCV] Add zbb coverage to test file [nfc] --- llvm/test/CodeGen/RISCV/sext-zext-trunc.ll | 285 ++++++++++++--------- 1 file changed, 161 insertions(+), 124 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll index 6be6785fc1d0ee..98488c9a589a3a 100644 --- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll @@ -2,7 +2,9 @@ ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: | FileCheck %s -check-prefixes=RV64,RV64I +; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64,RV64ZBB define i8 @sext_i1_to_i8(i1 %a) nounwind { ; RV32I-LABEL: sext_i1_to_i8: @@ -11,11 +13,11 @@ define i8 @sext_i1_to_i8(i1 %a) nounwind { ; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: ret ; -; RV64I-LABEL: sext_i1_to_i8: -; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 63 -; RV64I-NEXT: srai a0, a0, 63 -; RV64I-NEXT: ret +; RV64-LABEL: sext_i1_to_i8: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %1 = sext i1 %a to i8 ret i8 %1 } @@ -27,11 +29,11 @@ define i16 @sext_i1_to_i16(i1 %a) nounwind { ; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: ret ; -; RV64I-LABEL: sext_i1_to_i16: -; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 63 -; RV64I-NEXT: srai a0, a0, 63 -; RV64I-NEXT: ret +; RV64-LABEL: sext_i1_to_i16: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %1 = sext i1 %a to i16 ret i16 %1 } @@ -43,11 +45,11 @@ define i32 @sext_i1_to_i32(i1 %a) nounwind { ; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: ret ; -; RV64I-LABEL: sext_i1_to_i32: -; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 63 -; RV64I-NEXT: srai a0, a0, 63 -; RV64I-NEXT: ret +; RV64-LABEL: sext_i1_to_i32: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %1 = sext i1 %a to i32 ret i32 %1 } @@ -60,11 +62,11 @@ define i64 @sext_i1_to_i64(i1 %a) nounwind { ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; -; RV64I-LABEL: sext_i1_to_i64: -; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 63 -; RV64I-NEXT: srai a0, a0, 63 -; RV64I-NEXT: ret +; RV64-LABEL: sext_i1_to_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %1 = sext i1 %a to i64 ret i64 %1 } @@ -81,6 +83,11 @@ define i16 @sext_i8_to_i16(i8 %a) nounwind { ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: sext_i8_to_i16: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: sext.b a0, a0 +; RV64ZBB-NEXT: ret %1 = sext i8 %a to i16 ret i16 %1 } @@ -97,6 +104,11 @@ define i32 @sext_i8_to_i32(i8 %a) nounwind { ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: sext_i8_to_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: sext.b a0, a0 +; RV64ZBB-NEXT: ret %1 = sext i8 %a to i32 ret i32 %1 } @@ -114,6 +126,11 @@ define i64 @sext_i8_to_i64(i8 %a) nounwind { ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: sext_i8_to_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: sext.b a0, a0 +; RV64ZBB-NEXT: ret %1 = sext i8 %a to i64 ret i64 %1 } @@ -130,6 +147,11 @@ define i32 @sext_i16_to_i32(i16 %a) nounwind { ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: sext_i16_to_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: sext.h a0, a0 +; RV64ZBB-NEXT: ret %1 = sext i16 %a to i32 ret i32 %1 } @@ -147,6 +169,11 @@ define i64 @sext_i16_to_i64(i16 %a) nounwind { ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: sext_i16_to_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: sext.h a0, a0 +; RV64ZBB-NEXT: ret %1 = sext i16 %a to i64 ret i64 %1 } @@ -157,10 +184,10 @@ define i64 @sext_i32_to_i64(i32 %a) nounwind { ; RV32I-NEXT: srai a1, a0, 31 ; RV32I-NEXT: ret ; -; RV64I-LABEL: sext_i32_to_i64: -; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: ret +; RV64-LABEL: sext_i32_to_i64: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: ret %1 = sext i32 %a to i64 ret i64 %1 } @@ -171,10 +198,10 @@ define i8 @zext_i1_to_i8(i1 %a) nounwind { ; RV32I-NEXT: andi a0, a0, 1 ; RV32I-NEXT: ret ; -; RV64I-LABEL: zext_i1_to_i8: -; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: ret +; RV64-LABEL: zext_i1_to_i8: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: ret %1 = zext i1 %a to i8 ret i8 %1 } @@ -185,10 +212,10 @@ define i16 @zext_i1_to_i16(i1 %a) nounwind { ; RV32I-NEXT: andi a0, a0, 1 ; RV32I-NEXT: ret ; -; RV64I-LABEL: zext_i1_to_i16: -; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: ret +; RV64-LABEL: zext_i1_to_i16: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: ret %1 = zext i1 %a to i16 ret i16 %1 } @@ -199,10 +226,10 @@ define i32 @zext_i1_to_i32(i1 %a) nounwind { ; RV32I-NEXT: andi a0, a0, 1 ; RV32I-NEXT: ret ; -; RV64I-LABEL: zext_i1_to_i32: -; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: ret +; RV64-LABEL: zext_i1_to_i32: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: ret %1 = zext i1 %a to i32 ret i32 %1 } @@ -214,10 +241,10 @@ define i64 @zext_i1_to_i64(i1 %a) nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret ; -; RV64I-LABEL: zext_i1_to_i64: -; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: ret +; RV64-LABEL: zext_i1_to_i64: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: ret %1 = zext i1 %a to i64 ret i64 %1 } @@ -228,10 +255,10 @@ define i16 @zext_i8_to_i16(i8 %a) nounwind { ; RV32I-NEXT: andi a0, a0, 255 ; RV32I-NEXT: ret ; -; RV64I-LABEL: zext_i8_to_i16: -; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 255 -; RV64I-NEXT: ret +; RV64-LABEL: zext_i8_to_i16: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 255 +; RV64-NEXT: ret %1 = zext i8 %a to i16 ret i16 %1 } @@ -242,10 +269,10 @@ define i32 @zext_i8_to_i32(i8 %a) nounwind { ; RV32I-NEXT: andi a0, a0, 255 ; RV32I-NEXT: ret ; -; RV64I-LABEL: zext_i8_to_i32: -; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 255 -; RV64I-NEXT: ret +; RV64-LABEL: zext_i8_to_i32: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 255 +; RV64-NEXT: ret %1 = zext i8 %a to i32 ret i32 %1 } @@ -257,10 +284,10 @@ define i64 @zext_i8_to_i64(i8 %a) nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret ; -; RV64I-LABEL: zext_i8_to_i64: -; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 255 -; RV64I-NEXT: ret +; RV64-LABEL: zext_i8_to_i64: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 255 +; RV64-NEXT: ret %1 = zext i8 %a to i64 ret i64 %1 } @@ -277,6 +304,11 @@ define i32 @zext_i16_to_i32(i16 %a) nounwind { ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: zext_i16_to_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: ret %1 = zext i16 %a to i32 ret i32 %1 } @@ -294,6 +326,11 @@ define i64 @zext_i16_to_i64(i16 %a) nounwind { ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: zext_i16_to_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: ret %1 = zext i16 %a to i64 ret i64 %1 } @@ -304,11 +341,11 @@ define i64 @zext_i32_to_i64(i32 %a) nounwind { ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret ; -; RV64I-LABEL: zext_i32_to_i64: -; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 -; RV64I-NEXT: ret +; RV64-LABEL: zext_i32_to_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: ret %1 = zext i32 %a to i64 ret i64 %1 } @@ -318,9 +355,9 @@ define i1 @trunc_i8_to_i1(i8 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: ret ; -; RV64I-LABEL: trunc_i8_to_i1: -; RV64I: # %bb.0: -; RV64I-NEXT: ret +; RV64-LABEL: trunc_i8_to_i1: +; RV64: # %bb.0: +; RV64-NEXT: ret %1 = trunc i8 %a to i1 ret i1 %1 } @@ -330,9 +367,9 @@ define i1 @trunc_i16_to_i1(i16 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: ret ; -; RV64I-LABEL: trunc_i16_to_i1: -; RV64I: # %bb.0: -; RV64I-NEXT: ret +; RV64-LABEL: trunc_i16_to_i1: +; RV64: # %bb.0: +; RV64-NEXT: ret %1 = trunc i16 %a to i1 ret i1 %1 } @@ -342,9 +379,9 @@ define i1 @trunc_i32_to_i1(i32 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: ret ; -; RV64I-LABEL: trunc_i32_to_i1: -; RV64I: # %bb.0: -; RV64I-NEXT: ret +; RV64-LABEL: trunc_i32_to_i1: +; RV64: # %bb.0: +; RV64-NEXT: ret %1 = trunc i32 %a to i1 ret i1 %1 } @@ -354,9 +391,9 @@ define i1 @trunc_i64_to_i1(i64 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: ret ; -; RV64I-LABEL: trunc_i64_to_i1: -; RV64I: # %bb.0: -; RV64I-NEXT: ret +; RV64-LABEL: trunc_i64_to_i1: +; RV64: # %bb.0: +; RV64-NEXT: ret %1 = trunc i64 %a to i1 ret i1 %1 } @@ -366,9 +403,9 @@ define i8 @trunc_i16_to_i8(i16 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: ret ; -; RV64I-LABEL: trunc_i16_to_i8: -; RV64I: # %bb.0: -; RV64I-NEXT: ret +; RV64-LABEL: trunc_i16_to_i8: +; RV64: # %bb.0: +; RV64-NEXT: ret %1 = trunc i16 %a to i8 ret i8 %1 } @@ -378,9 +415,9 @@ define i8 @trunc_i32_to_i8(i32 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: ret ; -; RV64I-LABEL: trunc_i32_to_i8: -; RV64I: # %bb.0: -; RV64I-NEXT: ret +; RV64-LABEL: trunc_i32_to_i8: +; RV64: # %bb.0: +; RV64-NEXT: ret %1 = trunc i32 %a to i8 ret i8 %1 } @@ -390,9 +427,9 @@ define i8 @trunc_i64_to_i8(i64 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: ret ; -; RV64I-LABEL: trunc_i64_to_i8: -; RV64I: # %bb.0: -; RV64I-NEXT: ret +; RV64-LABEL: trunc_i64_to_i8: +; RV64: # %bb.0: +; RV64-NEXT: ret %1 = trunc i64 %a to i8 ret i8 %1 } @@ -402,9 +439,9 @@ define i16 @trunc_i32_to_i16(i32 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: ret ; -; RV64I-LABEL: trunc_i32_to_i16: -; RV64I: # %bb.0: -; RV64I-NEXT: ret +; RV64-LABEL: trunc_i32_to_i16: +; RV64: # %bb.0: +; RV64-NEXT: ret %1 = trunc i32 %a to i16 ret i16 %1 } @@ -414,9 +451,9 @@ define i16 @trunc_i64_to_i16(i64 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: ret ; -; RV64I-LABEL: trunc_i64_to_i16: -; RV64I: # %bb.0: -; RV64I-NEXT: ret +; RV64-LABEL: trunc_i64_to_i16: +; RV64: # %bb.0: +; RV64-NEXT: ret %1 = trunc i64 %a to i16 ret i16 %1 } @@ -426,9 +463,9 @@ define i32 @trunc_i64_to_i32(i64 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: ret ; -; RV64I-LABEL: trunc_i64_to_i32: -; RV64I: # %bb.0: -; RV64I-NEXT: ret +; RV64-LABEL: trunc_i64_to_i32: +; RV64: # %bb.0: +; RV64-NEXT: ret %1 = trunc i64 %a to i32 ret i32 %1 } @@ -441,11 +478,11 @@ define i32 @sext_of_not_i32(i1 %x) { ; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: ret ; -; RV64I-LABEL: sext_of_not_i32: -; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: ret +; RV64-LABEL: sext_of_not_i32: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: ret %xor = xor i1 %x, 1 %sext = sext i1 %xor to i32 ret i32 %sext @@ -459,11 +496,11 @@ define i64 @sext_of_not_i64(i1 %x) { ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; -; RV64I-LABEL: sext_of_not_i64: -; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: ret +; RV64-LABEL: sext_of_not_i64: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: ret %xor = xor i1 %x, 1 %sext = sext i1 %xor to i64 ret i64 %sext @@ -478,13 +515,13 @@ define i32 @sext_of_not_cmp_i32(i32 %x) { ; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: ret ; -; RV64I-LABEL: sext_of_not_cmp_i32: -; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: addi a0, a0, -7 -; RV64I-NEXT: seqz a0, a0 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: ret +; RV64-LABEL: sext_of_not_cmp_i32: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: ret %cmp = icmp eq i32 %x, 7 %xor = xor i1 %cmp, 1 %sext = sext i1 %xor to i32 @@ -501,12 +538,12 @@ define i64 @sext_of_not_cmp_i64(i64 %x) { ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; -; RV64I-LABEL: sext_of_not_cmp_i64: -; RV64I: # %bb.0: -; RV64I-NEXT: addi a0, a0, -7 -; RV64I-NEXT: seqz a0, a0 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: ret +; RV64-LABEL: sext_of_not_cmp_i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: ret %cmp = icmp eq i64 %x, 7 %xor = xor i1 %cmp, 1 %sext = sext i1 %xor to i64 @@ -522,13 +559,13 @@ define i32 @dec_of_zexted_cmp_i32(i32 %x) { ; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: ret ; -; RV64I-LABEL: dec_of_zexted_cmp_i32: -; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: addi a0, a0, -7 -; RV64I-NEXT: seqz a0, a0 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: ret +; RV64-LABEL: dec_of_zexted_cmp_i32: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: ret %cmp = icmp eq i32 %x, 7 %zext = zext i1 %cmp to i32 %dec = sub i32 %zext, 1 @@ -545,12 +582,12 @@ define i64 @dec_of_zexted_cmp_i64(i64 %x) { ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; -; RV64I-LABEL: dec_of_zexted_cmp_i64: -; RV64I: # %bb.0: -; RV64I-NEXT: addi a0, a0, -7 -; RV64I-NEXT: seqz a0, a0 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: ret +; RV64-LABEL: dec_of_zexted_cmp_i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: ret %cmp = icmp eq i64 %x, 7 %zext = zext i1 %cmp to i64 %dec = sub i64 %zext, 1 From 2446439f51cf0d2dfb11c823436a930de7a4b8a2 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Mon, 30 Oct 2023 13:19:37 -0700 Subject: [PATCH 076/144] [MemProf] Handle profiles with missing column numbers (#70520) Detect when we are matching a memprof profile with no column numbers, and in that case treat all column numbers as 0 when matching. The profiled binary might have been built with -gno-column-info, for example. --- .../Instrumentation/MemProfiler.cpp | 12 ++++- .../PGOProfile/Inputs/memprof.nocolinfo.exe | Bin 0 -> 1520760 bytes .../Inputs/memprof.nocolinfo.memprofraw | Bin 0 -> 2352 bytes .../Inputs/update_memprof_inputs.sh | 4 ++ llvm/test/Transforms/PGOProfile/memprof.ll | 46 ++++++++++++++++++ 5 files changed, 60 insertions(+), 2 deletions(-) create mode 100755 llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe create mode 100644 llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 8cd06f878897b3..2b29ea2a65fdc8 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -729,6 +729,12 @@ static void readMemprof(Module &M, Function &F, return; } + // Detect if there are non-zero column numbers in the profile. If not, + // treat all column numbers as 0 when matching (i.e. ignore any non-zero + // columns in the IR). The profiled binary might have been built with + // column numbers disabled, for example. + bool ProfileHasColumns = false; + // Build maps of the location hash to all profile data with that leaf location // (allocation info and the callsites). std::map> LocHashToAllocInfo; @@ -742,6 +748,7 @@ static void readMemprof(Module &M, Function &F, // of call stack frames. uint64_t StackId = computeStackId(AI.CallStack[0]); LocHashToAllocInfo[StackId].insert(&AI); + ProfileHasColumns |= AI.CallStack[0].Column; } for (auto &CS : MemProfRec->CallSites) { // Need to record all frames from leaf up to and including this function, @@ -750,6 +757,7 @@ static void readMemprof(Module &M, Function &F, for (auto &StackFrame : CS) { uint64_t StackId = computeStackId(StackFrame); LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); + ProfileHasColumns |= StackFrame.Column; // Once we find this function, we can stop recording. if (StackFrame.Function == FuncGUID) break; @@ -798,8 +806,8 @@ static void readMemprof(Module &M, Function &F, if (Name.empty()) Name = DIL->getScope()->getSubprogram()->getName(); auto CalleeGUID = Function::getGUID(Name); - auto StackId = - computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn()); + auto StackId = computeStackId(CalleeGUID, GetOffset(DIL), + ProfileHasColumns ? DIL->getColumn() : 0); // LeafFound will only be false on the first iteration, since we either // set it true or break out of the loop below. if (!LeafFound) { diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe new file mode 100755 index 0000000000000000000000000000000000000000..3000e2b8515a25c02dba016e75107b2bf80b3b85 GIT binary patch literal 1520760 zcmeFad)!Rb`#-$zx#wowPTL_lrEw^S!VpoY!R=TyBsr9Zq?9z2qEg%Wc1YtasRpT( z7?o3z2^CUCrJ~djg-+9v4xf6iwcpqNJ+FB`c|FhnPwVsj(*1Vb?{%$fUF%xcT5IpM zAMSK@XJ1~Pf-mjzk`h+pY6T?|DpAjl%Gud z{!e{>K`^d=L)fyq>VJ54G3GF;*mb#wk+9dUoOyuVEyru35EQ}x2y{PqR-RS z^Zu3R+2Ft9ubu^e?`(LTf9H9+Zk|>9U-UUCc<^8F2KD|Oj|l#6b{KFLeCS#5d(VPT zIt#w=Ecgp&!CyNI{^nWmkIsT07X07rcH%7ZKb-~t{VaI?1^;f(QfI--p9QaS7QEJ3 z@VaNg+noh(+%V5FV~I;!!GjFS&o`X~pLP~}`C0I-XTcAi1^@Ldc;!a_Zs!Znf^Wa* z-{tqqBv;WYXVsX1dDxrt+RKDkNAS#-z=x_$Vq!ES-(XU%{PmYWzW?N0yxFb5!xJua z8M-OYMG~+Kf6Xbm@*N&hp6R*x=RV-snZVP^^PtQdhR3=V{CyNZoP?f{1aO@-t#gk8 z54{IGPvw79eJ&z>M!W`jYjdvri#vd4w&dd3w}Csa=i*yNK~H-j@a9@?S($i@Jh?^5}y>wu@10UxG#3pY-dvtoBOaFH(*pLQwaWX))4}2o^`>~e5rxJg!8-bUYmfNl-Dd4f!!0*}rqA!6bsbBU-p=W3e#I%R&u+}+dGuPyr}_c^L*;8v2ELcZ-T40Cd6U{z`vJ(;A^DLH0>6~- zCG)@&{2O|T8@pv^6}9s>xq3d-74px~cw^f_+? zc*>KXyyAc-{xW#t%Cq1Mp^eW(y zv8Y#uxmzZQ;~AdV6W|}N`2DgRGCU8{Jo1QyW+VS0;m;q0d}0^)y*SY8Yv8f{z`b?Z zB?aJd7NFjjwceO0X80$NJ#TFVo~Hl7==iEn{nBoh>azv3?Socw{c*z47>r z1Sn%4&MVexd=7Wb<^N~}xSg3&elV&gToik^hP6{q`f^sW-vn`S}6qIKvZp13VR! zzltbe@ae=;?snkc(meA1bnu6tMZZh)-IDnYc<3PNt)%#-$5fw&z^_z==BxqE`ANvH z@%c!u{%h3VtWQy|h`D8t13hPw|CIOz{OON#dH#_6+_X0qZsh*HXP@>3LA% zo8do5^=6Az&jjrAmh!(Oj%VZ(tAR`Sb<5brz>{^se^~LKRx3UO_=k!IHUW>*{Jyv% z`YU`gSI>9Kqg~&To-cF&9_|2nZ(L8h8hGleTzt)3)Ehhv{U26+Zao+ECTSiYSQ0$W z7W~!=;J;t_$4`MguNNLW z0X+16F8`s^z*9P2y#Dp;{2rqD{oPvd+dnA|w2%qf_;d6W^y#enyxbA^y%cY|NyCi% zO3FiG8YiQlgQtrs)I{^rkmjYHo|gonzeV%=opr(AituwL0FTo+J@O5BG9TpHCox#% z?*s1T1IrFU-r1ci|C!q1PnwUO3W4Whntx9Y1s;A7JXy6{6`7Y!zkf#d*{bn59EJQp zD!>0n)&HYheP;X)Jf`{ExgLKt_-!Z6=L-~fDng&oYp}mJ&)#@5@Ovnpj1az#-c|`Fqo&ryj=JA)e0T0u<@AE3aqvR)(ngj1n zesVzL)d(6#n{@vDi{{PJ8t1R0e(@axzr7E3_TtY$t@j$Lcj_f*uM>p4n9D6&r-LW; z4D2vo@n@C-kF|q*PsN?*fjbWY7dLRrX`Np(G`}<}g?g>0z%RqrEkVsEFC{&HZw3BU zgm1qFJds|gcbf7%B@0jEx9Pip%k<=y)2e^+3gFUAx7_?H_~V;^=PCZ1<}=yd@DmR| zQ6F(K-oBA5>EYLk!x;Yz?M1s}I>W!80iNbKtUMO+eAoi|$Lm2KkEcg_@LWrNGHn3l z50aleqj}vhZTT^>?3MP)0{O_?&^;{0Ty4L&aR^axl zz&-ub7eODV6z~-)e67WrwcU4-c{6oMqQ+eKbLvh+??zangg7zPZHII$c zy6>MqRi5(d4UM5sxIF5WZ3nlUk^yM?#eNL9Orvh;_LB0@`e%(f`5SW-}o{3t+#;RuK3R%?5CpR=5f z-{j_8eP(O_Rv~?AIH>nQ>X#;(Kc{sb^xE}h4ag_S{s-#=pGEdxz5uvg2KspOT3Git zLb|_kzP2}57WT|&yz=lPJz=;1kw2$(e4R)7SNXbQTaVRz?0|!c}bt#vUV2avnj~S@rPSxJfb}B0+%$vElVZA zGyG}V_xP+O`1u^FyNpLOPCp_)fA9|A1vK9K-Ua^7gjdvl32MK1{l596>OA3c=$X2anh9O|-oaQoA;)U&Yn0Mk&wMlHdvI zIfAFpV-eIFr1(5^JnR#D0rielo)b@~d`IA7Mz@T826$pPa7icJQl$BVrTN3xieGRF z@|Ny1R8V~3ufSuUfXCBwgP6tiW9ARoXP(NhyAybPCgeT-!9Rj0N&Z|c>rKNmkj82I z+K{hI&_zFJ8RIr0gh*HYCe_Gb74Z;6Bwd;>Az*kV*c;ar@$6k!~-mE+^^-o*( zA9^dEodX`{2lUG=imzA?JiZ~fALmqs{?>26J8J{xy3Z|0zHt)z%XW=hhReKf`YU!4 z{2di9BZf8jSJbY$Jy7o#)UMN-hb$&Pw?$zSPv%nISX=zT@F%}Rz0I}WPjtLoKs|~fp7GW&j*i8({9^!lzZBPf+Dh8q1tMVd4>pGV3lyI(1H|Ab zC=OQ*!47pOu0C`n@RsCPC;I_^jru+K3HT##!~UM0BYfzu&=5D)$`b8 zz@280mu*kC3|4#E(|~*Nd7O-2(=OhJw8W7NzKiVNsQ~(<=zQn2j;}-o@bA!iR~LcD zAv-JxfG4K&i`QSJ>p=gyG*0_n3Y_EWnL)to(RiGA6YwU4ch`C&G_URK4Eb1R=r79| zx0HxO|KwM&vo|ij-U2*9`Qak9e^$?-Bn-Nxn&!{P$xjL;V4884)^l?abIUvj{8MQh zZGH`Skm@~G`!_um`nOb`7aszDXbo^L4qyKjc+zR$p8w}nguJsE?Ui)PEmseK{DQ?r13{XSX-psNq9 zQwALc9--%#R>#0&&jXL=Z@)_eUH%u*FZ;E-vcfH;IzgW)G_D(u1)e2+&e3@!*aYp8$9UYbROheg{lISpY#o=x z%(N?|`;cCISdkC@$ZWLhdgWQ8^Jzl&Lu5L0%T$SD#%}MEJ|lHpbDl78KI%>6Lmvq* zZs~eP?Md@UYf;1S4ztz(G!EOtFaaXrI zyj|Nx`+KDZg2$%yRG%pDb`+o6OJZsKrxVRDH)y}`^KYYNUNiD<)BNSX3iuo3Cl~6x zd=AxnT;>bIb35S&?*!g}#_2a9;LWJsiVJ~rzA;ww1b%*X;4sKPL-jr~9k{g;{v&Ca zTk^GkBhB>#8E=^FIckJ_#&ZGm%~`RrQYgDG!`-w3=k`QgGT zs{btTk5K+~+V9!tp^rDOb$mhFH3_&3Pq(c1*2Sj~XQcUVDJp?_FQESVMXEFHa{ffU zk~X`g!H3FE=XaGag#S47{8eemGmXFXp?FehF>pQyzg`j>BOlkiRLt&{HtT^$so#4a z0G=Uz3Qq%%(7fDK^^B9A*U30F_45ASPKk%EKal>Lk3-(kJlPvBcl-@JtLsNEKN;K_ zJeiBpFA~<=(p%Tv_5;XoBZ_xdJrkto%rJO@SAa*tlv^4U0e^tTOZOL{59gmddO<#P z9e6zdStb5y>~I75=XZBNK2;2P3GZ&HI2!nPiqEG-!0^07`UE7AGxl6heqQ%E$R|dF ze}?iOlm(;V$p!kbcE5TF6rN51$a20Z}>Lw zFvY#Shk(cR`6Vyj_V^Zf=tI=&`AM7am51`5avFzYd*BbA{D&o>XN2~3YhD7LJ`_LS z?FgKo6YJO)JkCwfXPw%ix8%vjZck9ZuhqOaSpfN=Du351$j8S4zee#@ng>P}0{7;l z^jgR}j{%>h^3eeF=}qzHnX16!?<4+r=hO$AfTt9-cWN`pw;_DL#uH~9>it6NZLkGA zk!&3+Fx3v&z(lwA+9cxyxElg6`{{L9w^Pcq zrJmY{>MbYsH{<0G!gou2FnaR7T;v3Bo6a>(i@^;~n)3E1>Hz0`;2%1Gho9G4(+~It z^4m=^zzzQ%!Ux>|{50Vq$v=&}P5Ui=^@r?q*jdsKw~T%k^(IFG_u|!WHh3}{;kTZj z*H(W{>w09l@+@c!o-EmOy2LAEPu?foeGl-p)W5HN13XR7TmE()^!bGRaD^yf_@mcC ze=(z5w#Ohpl=v%810K`&CcN>|e<^r)J+kCo)kpI#j1J3sN!LNi5d6)H!%OOdKb?i$ zlB&-K?SLmPgP!7MZn>p5@Q&1vcP~M`yw7@v)L3!E;jSu!(@Hf_mRZ;yDls7il z_o|$ekoWAcR^xL=ieo=ZGmT$ax^M7=>c2MxJW~}so*ioFI%Ousv3fGzjsCn(xM=`* zoZGJovlXf%4u~m4WkfVrIHG_1;bXd0O>!8lc|h+OGAtLH+}p-^%7 zJhv*(t2!=HIxf8VZc};0XX^yyy?(z_^De6h_+;fNGX(nZ{^zxafhVcG^E(5dPx;av zqJU{vG1>DzN&E~Rd>Z5B6Rmfo{D;AlZ|CYcO5;P4;zLHC^N*50f2?-n=e`~p34VU= zZmCR^rd~($aL+!|3SqbK@mzfdRfqkP2cdr_{e`8ifkzrc{%vj7d6Hn5apa@%zU)EZ z&ML@%s`49kUQ2gCoH?cVC*C^f8$_gE6hDyJ8lk`JYw(=QQLa+kyX|e(wSKsP1Qa@!@&lG5j6K zKKo>$XYh|{-dQkT`8EHPaOakX9s{1z_;Ziqz1{(yrsv_O)dpUT`fE%d=y?bERat#L z>RWm~>IU`0k|cln2JqxjyIM=V#&7v~fd9P=efa&*QEvgaXnoPFA9x(h~xq4weR=H_MKPxOTRSe3s|=c$EcpD(1nre7*hdw0rsF*xtLUhRiI|Kq%_EaX!( zj(*qq;uEr4ToNb4pWcS?c)mJT{yCZ_Yy4N(8X?Uwc8>i8`8%{--^xT|@Rw=*{^3vH zk9-U}csv)@fc!I*FP+i#MXUzoTj?)M=mH*o4)CTZaNb|NZvpU3A$VR;J)1~kVA`9# z68IyESFNefW3)va>#p^_CqEkb$Em+Qz8v~zD1VqI1INho^Xk{g#AonV$^IK8AQ(LH zeXczd$ACw5KmT0Sb5|MEyN>$zldFL9KETsYgNN6lujqMGjON)|5y)2|KWsG+_?LuF zk$_>uEn!?}L4R0fW*&#%P4<~-E<0r}5;gMs%V zJ2zeoo>*=0%XH?JQ7eH5hXR*$!Y!5N0?+=C+b<^;0uNV#yllIfc^qeU54RGluxBLEqDdzZg}rzrn? zNZ(_P(Q^*h>iJamC-8UHdRLc#9fFUcy|1c$YU=Y6N#glHCVJzyO~`**Q~}t{)Envpo`)50BlET4d6)dZ z>aS|gPjc(MN#cX)uV*PfbZrZs-zi@0U8VXz0G_PYd){lnGqlfdZh z@GQj>NB0j?w12ok{LQrMeDb%g@1fqPS!hV{?72*N(!_IZXYlZQDBC{+PmJ>aertg{ zi@`6`t6OSnKPGkGN!-INAAAA%_+H3&Reatbz_T=twyT~Q(zCWc|7_Fq&o_sle|_qg z$F2Zww?Vyet#|*O!1)#-d?UUUhskhIv?Dpaqn&0|tN0?tOE*Kk;cBq6 zgcrBWegOP@f5mZ`D9t#HJ`Z_G^W1Vq=STZ@w0EB3$K^kaK4A;;lNCRm0-mBcnI{WI zBcG*x%eCtN7t*|Tst4r1C%lHlTf-B(6!CVr8tw)k^iRxyp3)q*+%LG1KScewUgLk{ zJ;+PAaLe(Ukl#uEJoHoGncZlw*I&=ed$<*>^Q__l7rVdjSDvBrqp|15ln+$z4?K7a zc)WaIXEFHsebNv215eU=@PQ!k0^;v~Klsy6gFmMFRM2skTn&7e;`bfUdf&`#*Z2y+ zOOPEV%XDVi6)XYyH%NmEz>`@5`*&2H7sc@m{~B6f%=;7k zu}^dL9C9CUey;3@B>bk|oqgb!AKkJ>=dU!)UwJaFjXdw`R&5Hr9L4Q9qoC*AG_F^x zK3@?&WjT2GKD1+!@SFDDMRq<>7kGQ>-?DAN^9$)y{vY5`&8vmhEmsr)=X1<%H-q0E z2YX6$+%jbmbhE#�pIthSBu=9rKi7Ku?bi0v{!-^M;7>dYd0F54e2-ej?MHTUzLN-%a*xHVyjlePw%kgD2A$dOo2%eKig^m2&Yh)gix> z#@*!6z|(qO-&}b{svpK_Ju+!4^vTeLm+{5Yz&&rINH^Fo%C@mlYp zQ^2EiPBFa}@T0^tdMWTkTeR1US4(w$ozeC6MasW%81#%h4Egrjk@iU7jVYejm;iph z-{zm~S}#49GF#VWAzGJB(ECqfdjE+GH@9@}h2t=4br+}%O}9? zU1*o*pS6Dm9;1EEFEkF%C3|kwc_Bt|_0|%|W8+72`PWy1p1e=HS^V6LBi;vFsQZoE z$j_hC{D!7AGpBnq{y%PgEf#2?gc6o7l>Yb41 z=j$3j2)r5TV|@cWrt!am+Ih6bu{g!CO@Bb%(sh<6|GIRL(UbF>F(-frXdyks_j-*~e~a~o9j2@PgKI+mRpM{i3^?Dvv~djd*+KKs0bSSI6vu7} zfQO%>TdFwUtJA9&`1$^sPt~6%lRppa3;EfE?-~gFJHoq2oHyfz&y^-gyfyeu)UIx$ zfR7|R{xI4#n*5}knAh|#-`2=H4dPQEXBsi`+i?_Z3zA%B|kmQgx?@jl_#(xIka z&hz)Z0z9}OcU+Ht0eFhmajiZA9;JPZhrR-y)#r-EZQOEW1K206`&TVB;6AGPLy+_Q zrc zAKJBgIPmBc=y|QS_fZ|cd>_|GigW&dsx)|>pniP3J@n~EdY(H#_7~rGxB3Ru%lGfyl0dtx?r5*)&!0RCob#@OYk>2*WpSEo4qZWZ8s zf8Z-!6sK|2Js){OlJcd=1(4@^V78P39wYzs7eY_I$EKXd;V~3fv*K6AA9&yC{UgBb z_h26{AIqBxoZpL_5`_NNH1tvg}e66LG2Bp)_< z@_W`df3Nz`bMJr3!qfHV^P#^3dMh#o^86m+x8D4_1Nz7|kz3YkUdsDpGfRM{5!rv9 z=GBo0P;XZ2UG^5_ZORXC83R4-C6Iqocn(mBHVd>TT8)Jp4U{w{>5_UIcw4Ou40##{Vp>7uG$l_Gt}yaT~X! zdV)WA4e$pQFJ1`wxbBx;rTCT?fu|pZd>h5Txdiyls2&-+w!FNZ##QvdE0 zhcWVPsDI~p=XzIQ+{t6EZuzVxc=$Z-Z=DDEdBQdlcMbmmI`13T8aywPK7Hbl=kwg# z=K|;as{RMSleGU|?~w9nUMIuQEgdqz?RQ~+uYa4CfIca`2QR7e|6B%~@2UGu;-TqZ zz9-~92RPsJ_IN$ud>?pmedsfd;^Z#zXJa?sry9Enc#787t7YIAd5ioxehPS)_8V;- zkM<83kK*=jS*PvYOa1a?f9Rhc2D^FwziQC#_yEWfPX}+;i z^=Yj8GZ|XHPm%SzY3~P=cjb2jKi_*fN}nqT({lxv>vKVwFJTASR&`59SvQ&f<^9`j z7X#<}=mxY!z5JZd3eA)G{IRFnZ2{Tsw;=e-lRrGC@tMDq;_r@nH&UE@sV{K8PjZ{C zTeB0;j}k`QvSbeAL;5`AaK&Fg0X$6oRl5W5AyjYd9_XJ~4E}uO8F4T0=u+Uad~?f) zMOrWA`A0pR@};t?A@6AXj400+yMU)DpK13a@We-`SG(7muJ7qwL+j-kVmH(8d|&!l zUB5>bfahhcccsqPyuNrYrS-m+TW^gcz*7`|KJA5i`M#HZdJjY5EAU8qpEup89XR{d z#^1mbJeX^TD++<%PkG>8nV^mT{7U1uw*!0vjrWb>NJf8K-`^ae?Y)09@DS~LPk$A7 zg75(cfyd~4q@+I2naM!^!OGulI^_8ttxuN$=X|oqFTk_3Ke@jm+I2qVy~jep`TmNi z?B|&F@_Ep?Vt<44cUxZA2|PmkEzM=W#mF}&I}elbYjEDj>m%dW;Jp9#sm2q2|MBiE zkdM(h;DO3$S37F&67}=XNS{NuKt4my6@07y_9yM*oi`5h|Byeo)%or{n(r204fz8U zho8_qneVX~E_xa}@N-aOrUFmLFK|8N9u1UN=z7!9a}eoPx75<}g56|?FWZBM@3lCp z>lc2{@#fmlKTG!q*3@y$@7=f2{gC*bx#RcL0n{6&{l+`>y|xVH)zNbx--YxKi~eT( z@_qQ1Yd$uN_A6SAhdkdWvt|SLe5L!Ozbr zeV~4Lg#7S?=G%Pk_Wfb-WU1cIzXWdW%hl&k&6n)&8@YZUVZkl8RfQcKvtcO3Ti*?n z{MOhrL*KJ2aXt9?p2-p;AkW{M>vAvT`FZSLBvCZ}%z5b(I$uO-zHn-RpU>IX3cu0Q zdIIg8<+j4Ij?MsoitZn~JPw?nb9goko^CWAFZ>4bamvG6X&#c&JVe5jTYBsM6~Ev2 zm1qC&a_xUhKI#oW0{j1?Jm*ybp4IP1ELVI~6X2POXs?$CHdK4^ePVBa1pSjVz5-tX zPw0NW=eNhs01we~+V^OC!}@;oFx4la`AL}87mak?ZP9q&zYqNBH=w^)?*}>$CTSjw zlt+8Nr1^S>?iWS$J|fQ!2MV#ih#e<>;W7Tk--o(J#=G&K@DGq5rwWZ;26=u@@YE{c z{2iYQG>?r_9{Z^7&qq(?>ND{o)SCzZmu0V8W~9L%r1SB{U#mX*b9w6Pd&ddlZz}no zX>VtWCwpVS`91o;{m>`70{p#IpRw~HpY{VEqxgb$khd=bF2l+#tF}WvM*B#4y3gQT zDG8?(FaGq^`z5kp=CyBT2?@XHMmZuh?-V8k_yhhINjGg(rMSY)zd~hWC#T$>m`M`gS z#@$+tC!1)0>{Zdn@bi0chnE14Ujlyl(JgO(2i&G}qVu)imE?{&NcX}p|x2=aWNURwt^-;0&}0{S@m{Ho_yTh0Iv zQocGy?+*#;{8d5q@z(`Ezo*<#*Bv49xA}U$6Qp(MwGxnxo%ue2pT!Ri&hPbi(Ky5N z(WfgR&-wF`mw@woWJSk;M<{Rqz8vCYlJsn=>k+<3>zQicd4%Tc9_ML4euX&pvf8bt z#?Qz_@T)7e1I`@^dA?U9rQ?P3iW(XZgBlOLe!Tez^vTfv>Lfi!4$=MppLIdK{C$Vt z2Lew~o_5(3@bi0Z+ui|g(S1sN8Uz1^^5<_f4)Z;kQ|^E~e{W@!#-&vhmuB4qdA)NHyae_9*Ym^Dx*kcK^-v?|P1s=K^JQ4=ovg~H)Iga}AIkf}7FZPk* z{C(Bi^|`5SG%vg<4q)PZ^f>H1KH5waU(LP( zp49ovvqN0Zc_Vbr`$H}0W9xhRowZ$mOLrK3##5YGtLtd~9#FYf;8{ZXe^T+M3I9;$ z5yQ{V59dk#V{ra{N8MGx<743`G928p?svs0KY2#m8>RME)_I4&`*Xg|r@WtLw*8Gh zH<2CcUxxN>r*^%m>pOmre#kiR_^IE!J_g)wfOdIt?BI)v$ANo!c-wz~JBNXL{k}!p z8>99PDukY~@Wqws%^ z=a(bk3DR>x7vw`eMfa}f>v*x-L!XrDzh2kf_A2m?Q2YlO7bb4-d+|RH1AoE+&r>RY z`B7~z>C;mdR;FFCP2d;zcFR7E8@ygPAQQ2vm+u>ST^!Hg{QOcI2{;C~$Zpf5B7^gO zT08aIu)e?fsOs~P{D+a}yyeIBz>~C3a*3|r`P{a==3|X$A7+r|52-q^+i|V;q1NE% z=jCp`3U+&o=H=Z{?KhccjbHJ->$9&155Hgk z$DNROW`W0>e`jVOAER~UXJN?m_uv20eEv4FTkVpN4?PY)@2GmVEDiqIH1Axf>rlS` zZPYBtCtnATx4->>#+eAknb&-f|AO{2=V;y%Zix0ucyUX<SH~pNKu~rqvjRUsb8)>ANKi?;`98u;CFsQzsym4o_QU3T=y#~D}L7>z%!KB z{jBTCFs&=!*7v^cqq+KDCCeORAHGNZP+#ET!H^%R_12YsG4iR$fqU`!dVSt3s?VEw zb}l;z@_Zlo`!c>v|MEUw^F_e0WW}u^AktownK5dmW&r;pOD^PG+6n^NSifCbbr;NeaoZLKqJrh4R)*oobMalS_Aq-KL>voHFQAr zPhJQ8hpYZa)&4fc+pRjUS@is5P-u;voipGUGrDD^#?QZL9dYtT=*jzO6<-9--}CLg z33zg2?s(~@@3jT>`D)L9UN{HxS5q9b-Uc3eJ(uU#-N3`NpH}A!;8|Ly?=C=lYf${W zSLeHVgg3ewJbdrUjp`4)PT98*@@ZO+__nB?w4YN?_cs#sTt{`?r{a6e=6lb{|0)4i ziuYX8r=!3>mh#CpYy9DRRDRKX zDM9(tOug44L2)Ue`{5DY4;QuF^5J&W>u7wIWr$m@E&-gsqqIrpN2BLt>c`J@-5M!? z|Hv?ROFPXs_+INbR)8nfAM)P#s(BoEnBv~rIvQ`MB7S;tHE=HMY3+mkC49MMYY;qz zw2t1Paqn5m!}sdC;2N4o`Yizuf9Ldm)!(@u`g`-~m8&4n-#Hyo0ebR%Zinju=X;r& zsGdok$FI|NJw6WdrAW`TA9zXXmy+iJkLtNhN9E~M5jfxLHLWG|pFn=wK+Z2r9OHB3 zH~K@Kze{p_66D_|J-=EDd4AupiH^rtY2SFf&ZkESe?t1#ov-OR(%UsI1?jx6hK`FE zjf?whgNN@?wX~RHQ$TS`P8VBkWbP+=iS<0(Yf&3FSTDj7C$ld zmZtdZF9y!*i2lof^E&9Dx9>#nJN&BWw(H5B)nuHSdU>C-lJ;Xp^OLCRIai;v&C>k) z-hHrV>M4v1`E|E+5eGK?#rKMI{RH(!--diwl^>}2e}?vLI(!QG^lr#|^V)ZskA>*` zWw1Wimpuk~Sw6evLCF)0J`KsvJI)2p-`T8iKI;9JQiF{czFGC z%QoOa8ZTF?J(F|}Xhp!!_f4Ju26)oM^T7F#=lik0mhp z-|@+efqY2!Tc&Bf$tjBKbNgOgy+`etC40thg?xMh>XqT_mXdd&-u5)_Ts{YQi1MP$ z<-qwn8hbTfeM2q4JyCn7i|~mfBk64Pb7Oj@F3(bCHuUY1kQO* zN}s1nP+l}t{e-`>vtQ$0d?@ssp?Y2~3{n_z~h=n zRo8m=$#`_*5BX33alrX}{D401nmLin|9cet{9N?vb-?*v@$&V+ZArH?3=&8mRXL%9jS|_~rM%x}E}mkmmi? z<-lXFhMgy9dlyPTHuXMC^GMN9$fxgzeePBHi|&McE%KAc)PJrb{Lkr-&pwNKz5BNt zEd$Q;{@lNT+mz?Wbw4otMXtY{tMQiKM{6woV*HuUrv}@=`F^?j?SY4{1pjJn?{BZD zo*RJ6^42Z?=(&7?)^{JC1D?=cw0D!rPm%#{^s$cv_u~KQAJsmzA6rG&k4fSmBI^;u z!|RdWL%J9b;kLM59e+K?A#dBNd z&A~KY_KN+DK7SFu=?dWd9f!l3KkTG-^%Hqxhs5R3=MHVxQk@5RryK40iQgH3U=SmS4k@FKN;n(TkR_>ZyMdWr)N z)rTGUK9)OmUf^?|L;4-PKPk@Si~k$`p0xgI(;fVL|NISC15cBDz1eDqH2i$I+99jY zmpU|`-tjPa_%({G?SOaK6XjvZlcK{-{IpJcDT$-rFwoK{cdnR2U#%<`tbW@g*spS zM1EMw0ndY!SI1YX9Vj1eu6~lGeE>`2Mv~&jy%Io-ebN_W9CcRx_YOn7Ns5P4HBRz9 z-haFVp5SWOL58PWM(g{t*;By1c=fjKJMs5cMo2t2c8k;fY3nC~pTD>Ah}x6yr#iG$ z^|=D|PFH=}%eXM@<^8CB4sgDQq-0ikXx?lfG_iQ|V4e&DLKkv>0kG&ms_MS_5 zwH)L_F9M&SdLEbkG{YZk3H_z{ZfX7ic!nGW1?x6T(?oiC~@9`nutf58gnFU4Qf@i>~sAf3)pJfn`;q)%^$PH$`aypgo^E+$EpWakYLd1$_7>#5eq1XRnR>J50r&2M z?W_4A-{TSL19`r$G~rRsCdm#XJ`x%K`a^QIX`T`6AOPzO8>Xn${N6X1UG zhf4FnllcSwDL=ZUy5_5K8h7VZgFJsnrpZp=_EzY5uJRnze&>5X8uf;uL92N zk>WRjCzao`|LKh4^!(de%^&#ucd{sE`s;SeuhQx#7mz-|2O-b*rJt5SVeHS(^K{Yp zIgI+{yy4*C`>2QS1y6+D&-z{O&5r81m~6ke?~8a`@mCb* z^X2z6uEwZ;FA6}P9;AO6J-15Jx#T;V*WE|)aBdmsvzmBrDhqyozkcQr$n*W-cLk8w zMd`e3iOy@*H}JRi+TK|bA58zApn5ATMZLU^II>4^%Ig~b0z5 zrN{q(N9Z}FKh+KwksV6j0)6IEzVwj;o|)vIqrL?mrhU1*?|{b%-=q6H{Jr4W;&{g2 z_@48bQm?_MQoZMX4t|TCNBaCE@C=>5kJj^!w4Qh5!8}&S4&YC>huyq&ao0uQNmT`?rhVou6;W?= z3*wkOHsh9`bl&Vm@p--G2@%Q@{w@KY7Bt=qHBM$z;5YY8m~uxIZST9lr8#c7;9}tK z)4bXEDYPrw8SV1y+;E?^_YL4)ztqt7rl`H;bsr#3`v6ya`P^m4_a?2KcjCwg=@;AhM5u5>g-lpwM>A2uLt(^D# z$q&%qyC3Pq`QZPP{P42dp{F$!@-3DB?e&mPsGU83$cUete&l-ye}4mbQ1`bz`7Pa* zzdQIne=C&z0W(g+%Yo0(diTmiZ}|D1qCxt8N32}~SMqn2e^&F8XafHKmg2J{kuW^` zoth=Zz%x%kKCbe+MFAtv>!vEN0*{eDjQLRUozUNFZv&0G2yyh8ccP`+JR z<}cHaw(e(o_WWQD^a=d{J72B*K3%7Tu7llVTf;3AcR)T({&ThD=Z3$S{C3+I$n$q? z#!dvz@9!si0dGP1urm<+CkUS@5#XUV_Xl{dhfXo{NWqX@4FNaNPn5} z%jYS*=7J}k1y54tyFCt^^YFUuz!RW6IbY(u;qOCv=_Ru9YwW=9mn}OAJVyHu_tgd7 zit?puI?raRqFrJ}w;Xx_@;q-=*$+Ir9rB+4e5Liqsoumq$_)}i(3je1Xe zmex}()Zcjg`o@Ew?+2*A065=gxGDtvVv6$%#erRW(zTL@{6CF{e8158FnIXh z&cD@vQiEW(b!z9Abv>R)0{8sw55@UD=Z5+mNR;x`_hsHResYk;@8E&J6T`tjL+ibA zEcDFMINhyz1>YYM)%l(GVMq+@k$%(|mDf8_1uc{^~Xa z_42vudKs9eA5*WRy`F#8`U$v2`vd!%fTtAkSJrsT=gwbsgFN4pf0=Zssh9J;f{nm~ z8*}x!=9tRw%f(CTyu7o!#(>P z(fgVCeX-3VVA>UGj5t<7{imCZBja!3D$rlt#x0+>19#AW-vcU8eO{>t{s_g%;Wq*2`<6eF zacbNxWH@sY;h z$&&tW+yi_qjrT6Kz;l@L&vXiSlAasfs{4C!+TY8Q^Us>2r^v&mr%PuLCl^Ondpe3-Wz?fHgNu~%9kU+Uyb~{d;s!EeIKEhwl{Ja@Ms8nPE@?iX7E_F4>0sx$UFO> z&m$^7Tz+)@;dkKk6u(#OWAIox@b6LlJ2AMi8$U1ftSq36yrcWcroT;j=q1QItHARq zU~AeD;8{99I#URo->+ZP0{qFk;5n>3zupdc{%%6&k&x$e)z^PjeLew?s%g!shII;m z|D~@8nD!>rZ!gk%^ZejHL-!{d|z~bJ$K-B%_Y;p!}lOvqwzLb6L#>7n>x ziK_-r(!7(r3OL7s&qo92@3TDYjicw`hvSw19JMFk|NQYa;Hg0K%V~{=DUFAnlxN!$ zutTx~+AHqrmU~wNPZa|fwcWC7H}DXxBZ7acyyi0!-rZ7I5B5p>V4pgQuk%4qzGtId z7Xl`MTjs9-Kj#lKwSRft@#M>p=XFQb^}r(=!0-9_54sLY>VAlXDYp!22z^#k zzELz8c-SO7QVzL)TGp(k;I~!({)!J%JM(=(-Tzd5^xVzUXM?UElSgy)T&T~{{YKBx z9rKOXJNx@o427c-7{->iHT?P^W)?1gs&pF?qY$Zz0DYusp~^$yhi ze7ji#7xCjrObCY7={65<1=YjKmO7HCi9;N-FQ+t8O zX@BNM^@m_v=-*vCrb+~Ms6zVqB>yz+N|1f-S3e2w5Hdj6gv z|LiUPY2x0G6Kce=ZJPz{N<-lW#FZcxVskec9 zez-&Pe9qq<{~7WL+J9Is{6u74bpjv^Q%fp$n$$- zXTAWQ(dXGbKN+t35~+?D?=sxn^6ezZ^ZWP*HO}z+Wlh&W-kt}$i5s}()(r5V=5?Mu zTk5_szrTM}^$FereI#tUs`J8{d1(y$xf6Y@e^)XVp!?5PTz?}@E3 z1$b8X4K4+}_2TEiWBQzcr~eB-0#DQa#>OVl$EgH6$n@uy51In!`#lny!4szZ{0Dum zCaTZXNHg7XtHuYuxBv7l>OVAp6+Q=kUf<2v`!V8rKgIy%4|@AiKf=y`DSocRD`Wr9 zX*^z67xhMUKYX~#*Q*DdzYEi7A$a&+rY9Ex=ls)uBk(+G*F6#dO}wp6c!BmK-^;Y@ zJMd&bM8BM`dd3RiSLq|by>U^~J2%&J3TeJu?v(&;>gD_H7R!Rm;F0gZ^R&ueHWl@z z9=*Ut&M3Z5{NI(Q`33V0^g%}Gu;9YPpFb5qKBC_pzD?z8ybn9rJHX@l?WYewp6?rZ zS>Uo@_elE>)OAX0OP5<(}vakOD+@iSGQu7}^S6wb)#PsiG@`tD1P#)T! zDLf23PUm{Vz2`${-SMa9+k8KXt?{-TqI>iD}L&);(& zEaS__^ZU#1PJnz$=N(%e>ru%EOueZr{LOnl=jL_b;XG=*_7~^>`6VIG&xsxW0Cd!yK)DD!N)EN&v`#jn;O6B*?13#}L7C#1@ z@55Za3^>29H&4&?;&iSzI*57;Xk09f0`EZcTBl<0M`+*gwJ(8ZDbLUJ22T%Kul3h? zGkh1?D{kqQ3oC-hru@I`-QeMSuwR@CdA^V6N6p{Dl)t?&5ss*eJzyJjy=nah{?6*RO?7<{*7b!KSDWc`EMbcO_0I)A-?MGH$@o9t zf8IgouMf%2RU}Xw`S?WWBVora*Qq`E{^yf#qP=Narw{lDc#O`2mVE`>rgik2iU$?< z;z^*A>a!K&!g~%gJQzGzlfT_C6!OJ{ZxDIYuD*nCzZrOD9NK%Yws*x^@bkS!x29G9 z?U2{*vI=#7AVTf>pT3VAr{~K2nkUDpA1A9manW!~Jzf4d)$ICj!*m)fBU#IJF zzW@1s@qg1UzL%r*T;QCa&++2+3baeYg}HX|8(qy9n|-D1J7R@n!gVzix@ zbNhXJ3Dnz^)(cnE0?y~u-QECChR&&fEdzOH75qV_Z?}9s7Wf}DuIoMw++GWP8mmFy z(Dw2@kR6t3d+GhPXXV$;c;x%In#p)H{>J-buWBA{nO~H$!2Q#*zS48{#FxOm`N(X0 z8h*Y<%NB<>cqf_%pKk}8&xuyaKsNexB|mI_3*ups7^*%%K`95ivvCmZ$mwM=Uw5LOV z=?=GCPzn6?i09_3!Nc$S&v+3y-&3?o=cz2sQ!}(*LS&y`uSC7EcF?D%>ND#);Cw&H z>qCI^cW3g30*~Ggo?DgYbzOH`G>?qYIGmw4-0@wsD@pqgKkNQXQ0I{j%HKxU5qw`k zX&u-6Uf!lw;OFn}lzkd_cp&r__jXHsHE`~)ZJM7XDL*-)=ND-`zwrFLwXWl$y06$# z`6t{CyM^ifg^xXb216f7o88h`=ZiG0UnZoWXPo-C-h04c=z2`ul^Tys;;9=Uos=EhxgyK^82=FvLmoooov`Cf%xwZYH#ras*d@*61r-yrj>iDUd6d+j`|LnHbepcj{}QU9+* z`RA`XFZ2EC;|_yAwFh?a;=n_?PT_mlCY6Amjc6V8O>gk<)Ds6V?c#ON z9mj!3>Aj56zW}#1o_KmTS3gO|;ScxdKg@a-^1M%&)_t4s_T2Wa(fKZN9QgU_KW8*g zNa{XzHO1=%U1Y1cZkPxU(?&-dURd>`_0T3>vw-}?yDzU#d=f+v0_^z_Ej z;k$u5i-G5>p20ZyV_(BQixuy?4|sz7`L9vni7W(9OO-E>0Ak|I81m0cp9GIp1bG=& zZYdrCoX;uBl?M->!>-mi%=b>mu7G@Pn#Y6UZ$=-!_v1AiINukSehc~pDPA4b{Tzq( za|Wz}ygd%>60^8PmLu{!`FU2=ffu_!iyOG*9$jBV#^&1XpyIca|96nMZTy6v$Np|S z^ymAR(pw>K>H5_3|2~>0ho~QWl!83pAJSjP^|R!k)#^aLF3qzSbOJtw?4Q#7Ge-I6 z#y=tNdQhl{AYpxEbyNN{ayKMVY4f&af)z;bs+cgcHQVc9d5?S1#>qd7nOtTQ=37g$v;&NH9#bIS*7?}L;jXWab+{w*TFa0b`k;0 zX&?3Z;+7RHnjG}YzgGoQ?uCAUePc1FZNZHf3zmnGkoDdO8z{j`*fdWPqc!8fK_Fp z9S8)iu=A4BdQ-h{UY*q7i9XwwopZZ%ZAXNiSiSbapxMVVh#X=S+CiT`=nLdcw)%?xK09F9g@L@F9Vu7R4(8XZGoynQ&$F!# z!tQs(H|)ul<9j-kmscjw=gSYszPgwxXcx*qpVQpRD=rbOEKOcM-yf8}q^&+Hm`HU9 zTUOl43+4rDi;XNRk!OYS>V>VKy|`w?f3;OCm}LhmS1c#TCxJZQ1pDGz_c!;IscB!3 zSEhG+tIJzfdT@g&`BtU8l=ZBY@Y`d<0mqST{hgDAf3m;!%=4^D;W|zUU(tl=fq;K% z`wkVVI#zFAut89|Oa8Bn>ElvX^`Kp6g8UpdH3pYR*H}fxVPku1xs|uw7qwPawhGHw z9SSUepr13r8YPZsidfUOLRK)ZsudKiPV~s!k3E`z3v0>jR&7QXp7Z=r0OdRjqnLF=vZaGTo8O3?niYCi^d}ZIuevvnF&(%$#f` zLROuEfK?|@wtcwB3i>C6ePKBU_lKv-Z^}8X(Wp2q=kO`vH^olK9|0NY6)k^8Tzm4q zz)q{sG}sreEgfK$2v-UO0^y>NIA%qE-U$DoK8~-d*wwPS&WhH}D`Dk-l5ZDDJBq?} zeD!KMmTg-wU9{@4y=B5ycX9r(b-=P3NKFCT?mw|=-U1P^Ct1NVeyPp!1w}>i!vZUu zS9y|CEDg>pkuPfboTshQzOdhE;Z!YCxV>;_i~AI6OHNY%dl1$^pOdZ+caWdL)?`s!iX$eMn#%;u(US>WVi+r0 z&yi8#t6(e@&WjomfAATr%6W1Vj99LWuOQ%*@ykCr^1p!a+1rC*xhJF83R}(Ov@j5N z$?_udbEyfIaXnL!`~Sh=U9J3RkrSTm54W)LZGSRo71ikwOjsA)*1+;rmmrjS+OpCC zE8)n%6oV8?gRHI-1MNFlKHF;M4_0m%wVeu;FRoeWSd-tBs)9l-nn_<-C2ij-Cofdn z@>?x@6`%H5Ed$-em%=7m$sc82@&z3$Y&=~^(?%m-I8a$4N1#l=p5XXr`SVINkVZ#* z)^W@3Wc%%u48d?XU{!EDXORg)d?_FtQk;OVYDM|mKY6`qxhmW{&mRm2t&H`Yv8V0p z+Re)AYR}8BP`{$(ONGU0t@1NO**t4v?auOBW%^lVE}w9@??QjY$+o}DF~{=T!BXL3 z(Q-~+QB|wHjH*(OWz}nM`KSBKNY@rgi!G;vCI8v}!lDKf>_AkU#vkw}j5c;bo_LMu zBU6ySeL;zO6B@bWs6p|u3gM!*RvRmD@!WvlFAWIWeTJqdhTF>+3iu1Hwu3v=?o(!B zU{QN(ezDVFLPAuj7|5$_#ihP*bV6DGR4Z_GsOT3HYTH|ZqN>q=otM`@%u%MIGjWDx z7v%-(`DKnN@3T7O7YmLe7O{PKRv=L(Y{%uDF&TAvQ^dwrk>v!X3xY*X{E2>Bg3rx#AM6o zx6WxX(LB%)$g{&G^1DxVmV{>(hQ$`rt@hOtIHdHnCI`F9uSs(fK|2^PBU7~D&;8Br9y=eaz!M` z73C_F-2Z3V-P$xW>%V-Occ16`JoC=XJMUcY?m}Ks8#olH-k+1=8VAQuD~Z8c2#GR| zgNRXdpeE40nZp{8NFs5lVk9aDp@@%%dBZ}bTZzWOx}zjbgu0FMq^K&zi6+=sP^8Er zlb5?Lu|({W#SkeGkfO&C=&JM)vM9XMLL^khC};{vlAMGEVvbe_jxo@(ITT5>JEe}2 za33aOCxWO!4PcX4m?JvRIndrefe;aeb`92^7fE!S2)gISDXPNJ3-)jjP(`7ul0d|{ z0#2o<4xA)G?=2EL4E9L`s3elGYT5pmF%+M1`RH&2$5)Ug%f}|!cDw7_M*dytPJQ~%r zry|xOP$yD4iG(Df{WcXbldvYskr9!F)R72D9f8`!QZvxZBAjtlgi3;v86rnTL`BGy zVX;-v>5_+%Z0wLB5^4xXl$=C?+#C+f8yXul{}eM49Gf9AR8@k)p-Lq{n~f8Jw#|Ks z=tz;NNVc*xWCV*VR6;|76urSLgK|lPf>cl_O!F8q4uZDyW~z*xhMK6kC+rU*kk1_5 zOXO|hEQX1pu~JGL2{S320=nHC2$1WstNuyQ5iHo#Ui5Ebdx*C4rCMu^62OyY<`Lt=`GP(`O9aPG##!)g16X3fE;iQ#}kjKGNB8a>^93Bu5$Rm_tu#1?32cg8J zOBCpfS5PU?MxcjOilczkLrIhab+I7D#KGA_Mof%S^rduHa3JEKL*Rfwq6|`{Y`jW_ zLk+Zjut{(bSA@!e9kEhe#2Z1+YEeB!B0XA^1OF(oR78}jGzgBe2nFGwF%ld_=-S*> zQ5_Zi1MLw4;J^z9<6$urHHqPK;5`ILQPp7fVTcR~TFns&3Dsz*I&5W2IQq-WBRV89 z2`*+t5ifT*V}@%CbWioxGQUk$f;n;E8VjLPpq|ub38`U}2t<@30)rSuDLNFA966nm zEFxm1uSAZ6g&>QHx~f4U1ks@+N!XDPYR|bF1q_I#oCReAT0o!`kcW_vai|U%Ocj-q zL!!xYsuU7Zd5XIz961UFAuCszxf+tiL?tLz;>jaLQjkH)uu0KXMG~o)J4{i;Y??d; znj0B`^J+x2f};^jijIaw7($^)D5aAuG^9j4MUeyqJxG%{<9eFP$OVXq7*!0NHSd*< z9D>y$885-1P(^y`1RFU{UJB&scV!@AN{GaEk!(@&=(#?2a6*lU!VTcx(sT?Xv zA{qL0F@&mPgP5ziYee_VUj*U5(xCvm7yPB5h$C^?RMBV=&O^9Js6b*|sncLn45~sz zpK+|*)M63nrV;mHN=OEJE*OVob6{o3!%9UcvG4~GL$WK($XbZ&xM|+8)Il+atSD3j zA(=@iF*%mbs`}6yQ*gT@>#uA}ON~M#K zVKK1q795zte+cY1=n6zn8?@9l%n)RJG)V^;bd_W&>I$vX&Q(H=iil8=Xb6whK*+HL z7%4|RrB9Y>8F1_#^>RWv`WsA=Ff0N`2*F#)Im%m$eY zSOE)xCBO>634mdsh&$j3cmckEAHV=$C@Act2(k`B44R9BD2s8oDfY-n~;62ah;d_(%?-3QPnh0n-5Vr4Lg>U?zZmAFl;4A6NwZ2P^?r04{(l;0gEv3}7`7 z2CM-%zy@F|umji)>;qDPg8=%GsUf>Xr1>^vEz!~5yPyt*9t^hZI z+rVAmKJW;53_Jr~0&js1pcD83`~;9e@E<@=79;^VKpq$ci~%Ts8ZZ%<1ZV>WfFWQC zm;u&+9k3W!0k{Be03GlK`~W7v2Eu@~z&c<(0QX#wIA9Bq2qXd9fn7ifun#x@90HC4 z$AOc;X`m1&0nP�rXW3(;A=_xCPV!jle6Q6=(-KfUm$e;1_^)_Co;~0B(~Z3czS! z3@{E*2Q+|5z*ImFm<|{MMt})m4p;z|fHklXumctY%K#_99iRigfFFRqR>L$12nE7` zNFWN>0Bi=5fD~XqkPaLLP635LF;EUv0hfR)z%Ae|P!BW$&w-b~YoHDI2z&*;0lxrI zI08xlLjh?(0T>A=1FC>JFd3KvOa-)oX@CJ>0?Y>H0P}zafGw~XSOPc#PJk=m0jvW2 z0R|8Vuz*k?3WxzV0r9{VU}Jdz-VABFb+@yG=Zsr z9xxp+01N?Rz!aDbSOD{Y{{RO7eL2FE2DkxqzzYZjf`Bj}99RoP0~>+OKmvfil3;(=2M}RCK2gnCXfHI&Gr~;~i>%dLmE>I6V2A%@Xfakz#;4RP&bOJvBI0-?-0k}p% zWB~Mgu7?4mfU&@MKm(WzOaZh3J-`?+1NU8_*7P1K)t}z)#>0FbGbv;MOf73yc89026@8fG#itFanH$*}xoNE`a{_ zsTHsYZ~&GA&VVaG2Ydl0zy`vA2p|@S2NHm7KoYP6*aM^j=>Yo5hUqcj1aJl@2F?QL zs|=>+fD6DS;3jYzs0SVcjldJ233vsx0v~{nKnKtT`~na-`xFC)0K)+#U=*MNj0ZG< zDS#ee0-!Hbm|6f3HSg301F5PLV*Zi9S{p_0^)!zKq9acNC8rT zG~ghR4rBmFfouT%joTvNEKmmA9k;ZDT{gG4$l=5{hl+VO>%DjHkW^Y>{XUL9Smi+C z)aLqw`(%vv-FqK7qHUk^?OicjSF6|gCB9iYTl=M%|AFG#?ezv$xqp_AQc&G+{Q2hf z?m3fxK4+Vz-5F0>@V%Cie(X_+0%x~PCS!f^r?Y#%jaWjrn0Qe7b9KF=o!XD#K_7sAaNVA$D$n}6 zmnTY`__nPs`hJo0Q-}5b2O^GnFLRyx#v&k8h2hWEVY0U_2_cT2s zn;tnUty-}`-Xi#2&BSr5e3YNf%M97Le(K5aQ}r>*kGogunTQ;Iv)xK_jEGFK;gPkR zsa3DeeJ1-L2WP9SoboBq{O~Q!VeS@(Bn}Txps!H)UE8oz*?i}J31Rb$<)hY+_Z`o8 zL(Nz2l)Zc0l%*)M!6Rr7?%YX7$`cy>OZmum zhV`j;sl`ppwr^T?BbeNjv3RG-tnA6-m_BlrlVeKt^jkjZ9k<9h(WR5LIi!-he==)< zO!}g!@^39PQx9MCH7Ffi?%1FmLms-eYD8rG;V)-jlb88E`L-rK-o?%HsD8q4>GFN= zavToFSA>rU@t-%!%x(-qYLD1T-8SZz>WTnod5do^?SDLSoG|x!tL~c{!(*R$s7j5v z{|br|YecXz4e+vC2$veJ&fEQdZtL z*Fm$Y0lB78m|&%p=Wjl8jDlnF+9Qow6B#C#rd~^1Uh(RZjahVd+q6n~gWcyU6L&sw zG#B$xwbn~e^j9v_stRGZjo-2@D>7TM;F7H9GTS==hp)&qo7!e2)juSuZ3&33V;G;I z*)D6S%gB;C%w|o9FO1JtOfwyMOSAlF#}0?s#+Kltb*+{n!Lm`l`+{vBr$uU1`aj(x zZ~D1J(@){k+;ycxbJJGc-D>j6`BlEY4&|0bl)~ta9j|G{onP*2nWQ2LS@+$W8=rnX z{r1#cFQpXrv6l+P3kJF5xUAg0Q#qs}QYQQH70vCzG14Ye^lKj4Yv!C^$e6rwi@~tB zwNrDW(!%}r`=@j*p{WOn{z*o%_U}}FseH~pa%P8@(wM54+ohWAi_U$?tXi8ZRwKJ` z`oRq+5*)Y0E8DtDhIrRZh`K(pQC;PkM{Ucq+LCW2oGeFX!N-w%Xl0brT`E}@wy4F) z?SIA&qABNKnQODdEudHdwR%5u5#%7(BuN28d+kp|5#z8!{nXomaE=IDJX zdT>sp(^q!Y)r=uytP3t4<{13k*S2};W)qX#RM$^to0pxvoZv|+r`0^4HrkAAdZdOj zG&*zDHFM?2kITAO+LZn(nqs+BM=HK^W)XYRsm92UNrTuI6NA*3H^cm#Z|2&bc=a>W zLM$e$Ow%@{Y4_Y>=ZYb-Dw+Bv zv5Oi=K8pHUA7o|+54RhA^82JhHAhpX?v%GdZI-9UY#Y_N+%#or+vB){G!>JRpN8kS zc4f9Xtdo!0*nQt|+iv%V@0ceZY}Pz-J3emHoST0#Z&-}Y{88dhxa2pBT8s90<5`GpIuEyT3`Mq*cT=S6Br>(wGLuDo*gPXdqUUOL6c_Xc6yh8Jr zlJa?J%FH=+@|knPLq6PLY#e^1a!^yIO6s?nmnYAa*u4QGme6nCZkQT*L?CGGXY#lsYC zR4lsHCUc->qv$l*kv6n*zRbWI8y~Hpjy21^$n2i*LPLfUl5|ho$O&@f8mZk@tZc2g zRDEAm-^R@6o>?^|d+lbCyyio#Kcn)d?r<>wL%E+&{V`;x-KsmA7kVX4RLd6g3~Cwn!WJC9`kb~_xuH&)h3ru>2q<+rTj<+q!vXKQ)eohcse{v{xL z!G)?3Vdd}sIQ^>i>T{Wv?3QL)+KLGqg6Xh)*lT(>}abkcp3`E=-G@JpaAHZVRbj zcJn(o~_!8i*{?O|33AVW|Xtu)J)NIlXZ8;g|Hcuoh#HG z-yC23G~x8*D+_4k>PQ(*BWchKmtjj%%XTV9+Fd%*phBJ^VK?`as*1LYR8-)f!#Cz$ zKd(K(Y-Zh(Gs%Z4Q`#&p?EKj{xOP_ED3)ko)~_cOQAVWS-On>?#`;s(r%1Q&%n-|X z(W%>*&I%c}bLr=-*tudij!DfL74TB2EM9ZG_pith2Rq%>I|8EKY%wzLwh;}MRbAUr zpD_6KP|?R*XM8$;%G}tD|g&%?hd39lx`g`EEpkOOBt~oVP`RFRD!{e4IkkgO(ne*HQh01M|ARf%)gB z)9=FU>3cWsxj9cwXa3|3_V3Cfb*7rFb$bx7OHNTFN3(_$o&IHNxWnU}$}0PmK1EiP zcKaspEhjtAd4})uG(j`tktGenO?TlP!B}qFs-k#yy z`r~Px)m#mB=8`cn-4}EveWz!BD+sGSKT*MeJ@rg(;t^!`ojZHx=)k<1QkYfk)`e>3 zg9A6$PG&_0ZVbCryydk)T&ZECcWnLTa~mV=gvw|XRFiFBZ?=qkIkw| zNE7i}*rxF$VTghkW95?Nv4N!6qx&zOSGUk~VI_*L%7L=geVXQ@wRy`N!*qOS{rWWP z)0FRzbX_(rI|ja0lsnflOZz?6`}h9JYB}RUh)s>rr9TtTysu(*YIw7*4{z}rvq<`} zYujPYksSR$yHpxtPp~p>U)=6=D%d)r(D%z7m67e1Y%BR}f780s$20QhIo;8eN|Z_S zJO7i?mU(;4_g#k)hQwG$OUf2zcIlot=#n3jFG_#YnY7!M9G(00h-ARns{dAo$xkfN zITNY>q?^LlTmK-C7Op9NH~-k!J&jps#{OCVyM4srz%{3TxyY%AJN@yB+!7=!wZX`0 z5bMLY!!{w3j#VC$Kj+kU|6J?hsJ7#qhDP`u&*Vu~DvI@=^gb?q^l;yHi9ZILw4OR{ zZI|7)d6}=$W>4R=oTU``agQvz4MUE{Y#loJvV8gkUHc7ZJdauav6b0fUOD8IR-5^R z_?mYC#hKv`#S(U|Oh|^lmP7kpMZTr8v~<6d%J~172b&M(Tyo#7qAL1Dj%;J#q8DZ7flO}XGe94?L z>rTQYIQMLP8AUD_Syr84!MN^ETjpD8Aw9y@ZK}b8@P_WMaGdmYnULB>q-k7M}L@#$5-DwBVDmeCGPC=%O_TQRmx0k@go2C z=W=`F6V*qp#p=z|4nFOuG*?KzJb!dWUV;io@rUD5m`?%adi*hI9fj{#hvmJuU@udr zJZ*WW(@`0}b%aAv+m6>UW*pb@g_BF2=Y_W$T>KQ_AGG1}!u9iBOTFBxZZPcDx@jeF zzJB-SHP6Y4z9xD17_vTEMKNZt*+18Zww3+XX)T%V_VGqjYl+uMPyef{L_!=(?`XbO zUDJ|lxf?nC>XM7R%ERuf3vQJBtc{O8qgKmM)Mm@RG&pVeea($A@PuS#R@q*H`8v=g1wnI5S+?#_M*D<=h=1PX?HJ4FO3`BjplLR?9cM;h!1VKkQp&K4>h|`q^eJuN%^M=)=(ezF9lnn3c3jMXo&id46o?l+^_Z z8|D-RmnDc&Pvi}I5!z;Q<`HAJ!J)MFi)w0)cNG^MuKcpWImE`=cALjKLzQ!S6U@BB zKR6jVKioR2(%tdKA^jm5;ZJCCmhs|;8s4sJeQ;`g{EW!G+E=|t#KgLPiHlehWiqF0 zfo`N>+w1Y9x7PA;W~_p2r!6D4pHG<~-!kpx!KkWNVJ{xuEN91^p8sK(ZKO;%LfY0c z==T*9vTIIAjOUhmN6%kM6%loBnKcdZ%GA zWJSVtMy=)&+XMIBXiQeW>!`g&S^e1gM-EAzwwI42kVQYqmi}Ivn7G$aWJi96%r1v_ zB-L;BF5f22*VWnD*uv6FHr#ai=86)>)+0srXUAwB+Fs`Uc2|J%^|X&_hJ%l$q&jRn zGk%9vf#UtTrXurorJ>>}ZPYNEl^ymSxf_(rWW2SPP0>GDnZZu|H3H^U+c~%1xqi(e zM`!8G>!<1!)!Hv)P|s4GzxZjzsCaW4ChKTx-3V+m-^(7-DjFd1r!>fZy1|PpzBBGy zPdFA$PqeFZ@EP23aRxO>?BabdeXklg|GeRwQuaLdP8wW*+bexM|Kh-)?MF)YEr0h5 zj$Msz2Dt`rAKeW3KHq3u`OMm^!m#n~tw~L$s#aI+Lo$oyT;CkOyXx!bs)V)5VOuVI z-759o;KQ$iRaZUBGn?h>Glr*XW+iXTdr>X!-QllTu6WsbvS;<8r^lP-uZ@;{T)M@AUIr$r>oUN?2$^EnW zCnC>&JK|bg-0&la^4Q~??Jc8@o;eFuiA)Gvc2m>& z%&x6LcB9*rh8YJ-`R%ZJc7dIF4$dKT%HX~U$2)@kvvYHmiTRBPPAB zi4<(D7JT^Nfn&v--*0Ywzq_l&{9#sw-g94xOryQwOWQ;`Z)O*YPjNUImE|(nn=&e!vcF2ja%VrQ!t7^s-<^x4SKAiE8zp5m2C zl3f5fLL?6%m;Ssv@m+ezAmcUD+C4u`*7~g2(kVVJy=YwhRAsvl!(JdCpWN~*igY~n zde`Ee_r6|@n=gH+kutg9z-ar?_P-3$&+F~5x|ICHu_#NkHTUI_DRW2@r~5p_z;%T_o0Yj+aiQPM=B-kT>U1aSPL?LI!q&CEF1oNVLEd)LGKNFw z)-uSyqv_C^)0^kFJ$&pm*nWHL4F@-xN~gNNKBa5)=gH#>)mmg~MjLIurJ1|^dBV9+ zspBHLPrJibiji2=%j343s>nPn?-X>QU~y2_gsiHNf`--`#}}JBx5ms3k-p<^@#5mk z1)sC8<#BF>s@-&v^cW?2!dE~@tNep@KR;Qt{OGNKD z`Xp~H*Bkj{lIQM#+ z*{$VCYsV=%K6t7gyz#SCHSNNLm*oqO>>J!jt5`Z-Ccm&)UHw_ZxjButQSFzrZj|12 zkF{SrjlMOiaJR|E83Fm}K}u6CoSnUP{-LryWC zaNZmj^^BfyJM{hdwruCA!X%eKroS9bRW^P!fCp zPGs;geWi`ljIyhvSDch_UFJJ$#9$FIxJhJ5xy2q;LJN!8t!Nvve)5Lz1$5&fQx>*8 zzgDJxe&a_OgT)#%$UoA@JUsivF>|j&o0U$>Y>5rWYHg-Z-Tn9_YxW7IOrCl{_6OCn zkCjV&JJt1<4Xz+xTdlr4;Mc@1v!vle&)%j#6T7I}<$8GRZTG#yoE6RnUpR4}ft>TN ziu84`j7U?H3WfbT$iKAFaLU3CW@nsfk!->Qwa%MNxw%`cA1EbyIJK;KSaeAIl--RA zvwd12vP#TE&XUK?ce-B}7Cd_U%h&s1&ZG8KpLc&hJ>=s4Iw_fVO*@rmi>4QqKgp9H z^_G)xYV7sj5?|)OLGA8O@8p}DG?L!)+`-&;yHm}juq~OZIZnk5-i-rIRU3E&);Njb&__Lku2ygm(8mpU%O^0 zx?JVh9T9!kV`H+Ob#{r6&q#erdAKUAmie+qy0mGVLEo7rF}XZhsO#jI=zjb#qAv|oDeu$R|!=?KzrU8B-S@>PtSRQjQe zyzElc(kH8P)&>sFl8BuCa!cny%^3y?iE(Z6ySG+{+UR_qwbSk{v)wnR#4XUc%C27g zph(@ak>xAfE4vmpwU~%xN2S{P-DHcUEo2X$KXh&N#h2vzHyP z?F@?5UN2vk&K9fVi`R9Rs<@xwTpum#o%u1`caGWA_s91J5>;Nrs;=Ffvg zC!9|HZaV+q*tH=Z6Qs)DAydKgy*eG|YCtSBf6xTc2OAZIQ+H$Y1Vx|4nTh-68X* zdxbJ*Y9~e2A?MSs7U;Nu;L11!y@Z3S4VN!G17`{A-~-#Z?|2WS+Tw} zfBcqm=fM@pvPRSy+uWul&Un;vRB=9-DD8Cl0e zf2?#Fq<^HNgXC~8OH}dOg+TXjE2YHZZr0tn85^m#rKY%aNo2drrLZj0@fQ23HxlLi zEzYFB(SIbn_@QZ{^I_}9;v~^;HD8yUF&SJs{YGZkkqHt-T~&<-*a=r4vCkwq=4_e2HAN%57 z&{4Fy*=oe8V`M}7lXK3xFZ}SzF|R_;rI2o1I#2Jvq=KQGjh~19vAnb}jvSM*CIF5N z)Q_8DO$^1C$BH~^D_W=7s(I41&D`DP>F*~WU7g>nZ`0qg{@!^LqqaNuCdpU%FWKL* zJX+FXjAghoV|3ABMJnskj!g+lqY&%qkyRtKk=o;~j}cK17Uj7mCpmI~*NH;PZkM>-^8{H87u)2Fm+8E4J4;#GjwPbE9{7(&^@|nyPyx+uqz8`e|i& z$+@j%*DRw=4mA2c?zRY_g^wn$(zf4lE~AZ-#JO9#!^*N7$&>I^-pY)yzjbD7mFHdM z&K-x5@VXH?^oHkyb|Sh)nN>TExkksS?XaTVUp1-5=w9t=*H0A1Iqg*CsivQ&4^d(s zP`#2(iCErYdUlsek_5i_c*Wk6*J#_C6#Xpc~NiUUb{}BuKneSK`Y}7!j8_H zv;8rn+WIs-b8hKzFzp|r`4?PhaLB?_$^a$-4|qiV%YlM5*xm@ zd{{Xw^D5ZV3+CT&pgw60O<3_fjM+4$z;)A@DceSgSdLM!6Ro(W>2x>V<+Xf#wB6VL zR{pH>JhCHT+dePo{v_=gR-C7`bk*1?r!uN`#7o#V=3mOpRaIP+pM6<> z#nJg~j0;uktIx>Q8=4+pmj?YHbLZvKz*TGk#G_29eB@MW_9q{@x}Q% zjN5LX6n=+%ON*7UmfyB9D(iPp`$NmGtosr9b?-g9Og1DgQQoxbP(4{fEB8;*ykhk; zk!#&q_(%O<9Mo)M9ruu()_!AW$?k_5f_W1{V|teU1oXwP}LQ(klGNw0TLTXxUU z_~z9#zuIj?jl9yBB5k>doB6jl(-e!u8>2g-xx%RTU;muDu9&4Y7OL~b*+zNbC zFn6Tmtw1wFk^A$VZYt$exx;*>?;VkyelSPYJnySY-tSE+U%vToE86{Kujt6~S%0o< z&75W0cq-OKdtF;@vihjkq1^`;hmO=8Cl+f=wfgC}dgDvwr^xP8p@qA|x82s9qc5jn zXmwYosk5%zV(GjO#s?C@c4rlQw28OlB> zW75iImG0W9tf6}4){b?C&L?hPpJg*T$@7?uWR|W%=<(O13{{om8e)`2Ecn>&y8d_f z>?h}^EiVk+*s{OX=a2889NQ6RyG~E>)t5G^CYO{=Pd6C*EXe1(li{TwUK^7R7gFvI zDSNUdV}c6hUcBafg>e~C!!sUx6pR=7CPTI#v|Gh^pH{;cv0pDO-9EQi4vWpQzkFV; zKB(v%>dV^p_I1oqI_u@@RnrdNnJHWH-a#wpgX0j+xQ;*cBEQNmkKg$PvMxatS6{e4 zp-t#IzduoN^6h7;VWsdaV)5oEN5z&G30In3b*I-He17}s>~oXUat=o~joxR-+&1-~ zn99#n|M^|-ES`U=XE9(bwlLTShA)ce;yg~Gx1}n`HDBkGlHEu&$!+! zzA;-W@4;yoS@T&r6V|u5s3eDU6_1hd{2VdAxy9kj%lOET>&vLlpDJT4MT6Z71CNU@ zKO0h5DnkNaFp;14pVoNo zunLQ)Z?Tpi-}!FRgY3I2W;}2-JtA{iL0?OTUs@XlE+`+rWzx6SytKvMG>^N00@ zmCw6M4nZER#SdCGb)J9GJ$SFm;~5L3t4_-oOLZ8XEJYfQo!VSyYd1{N@A*{K+Z#M@ zF8D2SNse;P;9}jm9ad!;o~?=Np0cKC&Au0+URO1Gq}eF*YVYt@^NWg0lbgB=;aKz` zT5&Xsby@zU%rA`uZRUk1;{sE4HcB~})JA648h8}R#?rZVgCo`_7W>@VUHFVB;6@Rv6`Ib$~Ce)2+P=oO{ ztiic|#zx3;j%6HZW}e;``CwB1kuF_->ub*rT}ZM1D(}3vUfWTj>VT7W+}nNd-qpTq zY4h^2m+ar1)6Kq;CL6Qv=(%LQM3=;0-mDvO<7FOgc(`ca+4Cd2o@A##JbhH-?)}w! zO()i9{-m$`e8^!JT~|%M@WaqMkCz-g^vt(WFIZhKn$_&ai4^LAdJMDlHW zA^GM(d&r8|xZFLzeCMptmOK)%t9#m;&IsB2jMew-CjDHbft+wZNZvSh!%yvXr^n|) zS%3A!6y*DnK}xeqYBx^2ruqK2ds16g=ok5^)M{7A^jqzrbHqF=?M>iQJu7z6UtRo1 zW8OS#c`I6~%gJ`zh=^O7<}+xmanI6fHm5mQRy7T_woUxC?q*!VC5uB-M~4@G8s%_r zP|-o{i`!1!f2JBDQKC+L*!WaVC10nka@hOtds7$>Zm({OyLi*U3Hn|y?UgApEtO2e zM7nvd;<&852F=$kn=4HphBb8W%Gur~zSk{n#i!d7mmhg^oRdD=`MRivOa5G~;plsv zpGsuoiBc^&s}ob(w<`QryME2g{oK|;q)o>um*4!*M>an;jd(qD@egf@<1^}`<=-ul z&imrE+3;X`u69kX@jVThhbGrc$Ji!MxHol5mRZr<`r-f+dyEFa&JAG*j=WCyZnY?fO9Gtl3ML5Gcx;pZ7)!VngWA>fChQ_o*>_9&+2qx$Z2KxIqN^FflFcjH=euQ-x=;sAN-vR&e zV=?_aBU%M-;$s9i>`)R~0}KH_45mR$;tpY;pBDrZkAcp=Hrzk~ zKMbZ!cNXzB!w~SpU;_~D1ekuF(VZ|7!`ov*gNe2^rk`ho3C30sSXbBrzSWCG4m?#X(&9%XK0Z|~9n``U&YoL!` zRcry@iq19!KN!o+we=(*h{W{sjNFOPN{0+k^ah5qeAwuD49?*T%gwcUVYviaF{Ynq zWD1GJ^z)3|$$$kCm@|%|*W5F6f)$SG=NY+42!gQ%d@FA%y!IGdj9UW;^@3BuPFn8AdB7;bI|hC^_D56s#Vz+`!_3Hl5yH`ms4hK}1LvApOhJ$hp9kLBz;r^S=> z*?UVgf+IP;NYJw?fVeW4ex8v#A*ut8qSx#}Jaa5J*VglwXwKLIzSS*=n+A9DVGH+1dEs=yO+^Y-)d z(_$^)fK}WRiKP@s-0S9L##}(m%$Ul)%sh8*e)Jv=#(e^e*3J#VOfOIoOh137!sd$Q z@7sB}tpZCUF#q0-j-?yuKn6c-1f!0)hM@#_R;v5;|vY zCJfzQ&JZ`@)xweV3+3)m{4EI$L!g=Ts~Z0phPkNR28A9A``o$82n6zrLhD4-gY-*+vl|N33P&Mm<~B&o z#vzcXx4B^oF}&QgeGNp|?vJhPO)TK%X71b6!ZKqkdov5Td6}`dB-YB{Z;iitiN4(Y zR6c=(*Wg%gt_|)nFhk)9+Fx@Js0@yx*W6PG_93|Uhok5H6*SBmZCSXr?5&$ z9agahDtZH<(6b(hu1Ik;0*;;{tPP=gurva}Q0!S^A$hPh0>M2+SZ<+turva}P-KSE z8R*kUoc#eu(QEEWCM?4$)<8vXAbNL8NG0$jeRi~jH5Y*vk0`GI+K5qV{2fyg3&Aibb=ehc#=LltTbUs@FacqRsMnQ`0WM5 z+haoi5IuOn^!FMel*V8O5^&tn z!W{xn(q|XwuzCizG2P&q7yb(U7df^DIx=)Toe#q+5I;S>NYDdiIJJi{d_q#;hy=r- z3|CNosFJW&I3mGtD0>G6vE9+ZgT%>jW&Kn1zC+g=VePmHF=YKy@ZJqP9TsSIxEcXR zZ&q|kb^oDeSh~NG^m{wt6mVb#xvP1A@b5Id_przVqyuwesrn`8&5o|>e@N&ajw$>* zP4D}j_4H49v6TZ7fdOr~FmRJ_D`9H{g55mOr#JXHF#S9ucS6wDVhi}zfI#^A6x=IB z3~!GK{UapoZ#Opt{YxG_<@SvKyU>Sofl#Hc}AG9JP5|dVETDRXs1j%I!0Vbw~HeZ3=awqB0j2M3;5PR zCTs&Ag1#DCz_+3oBZT4D0=|_uCF(!$Bz<;XiRf$}Pts@aE&W4X;H3WV6($(Yga+(E za5%;E^NeuW(=)1uARJH9XNTtD5lV2jkLl+bVM6l=W}!ErXs}^vLYt^CHD*E>KEe)! z4;h5v_dR%$K6_6oSa=c)Nxx7yl%UUjg;$At!fj| z^&q&Tf}`j)Ljg91uoRe*zECJJFeL(uhpQ2AKsDHb@E*Ti;3#^{P=JaEOMxlr3xyK8 zIKr}EYXpKrJ$kYbEe)1{9~SC?)`2J*!_Nz1d%&LO#wPlF0C|4lC0^=xp&HTcEvA92xG27yU?Ec5M`s$2$cie z=r|d|6AWwt-^!a3og3gu`s}bHtzrcRGYNVnJV~D&K4?a8l8)i+F)=Y50^0`5&9#LF zvOEbtI)N?VTZ4R7q4$mm_5e%)Ka4jOa$?F&fgutMhcbE}+EZwC@KpU0z#I5PB|98W zmI#*(M02X5yN*fqH}=xzdFpp%dX)R>aK(B2YJKaMXF^uVXA*; z3$8}Mfu2?gJCx#T1RQYR8a`(aPw3q6S1hhZz`-pNKajxH2snCEqPMmGp-LFC{wcWc zP#NLF5+YfDANL*lS$AR8!_Wu>bBpMjC+ZytRQ+7sH|Y8oo()sgFF}xZ0O9yAOh3=a zf&*+|08uS)6uoBtVFtbCLZ8oj!sVBTH@XiItnz;kB2xbCTNUia^5lK90?skyUqkS9 ze|v&F;HDsc5BIVyc9|rX1KJ1>W+rQSCz_IrKh$58!?c`tW(-=gf5qRTn_&Ya# z5K(%*4|)gu1Q?44A$8cA!Tf(X}^k+xD9=P8jhlaJ(?anC=6IRe~$4|1(mk98}U?6YGC?#M%dRe?!XA_gV=&TE0jFx{yqf9A^5^eY>j{eN726?So7%(^nrsp!SE<7 zH`j(f$RX&!Fx|Wmwto=1Y&~CtiGLiX(Fk;ZIsM^FKzlBq@YCT*`t1IFMchZqxEcY6 z*FWUJ)d)CX6$x1;97V6$zqf?HKsb`VQ1nw$Lh6Ah>9a!)_&|}cPhMh5`a-#-fggNm}8}4Kg47`X5G6iF+`XxZ$xCav5g2qwwn$ZHC5lnDSkE7@{qXiGR`A^)5V<>t9 z(L(S)6_%T8^YRoDiRI@7!S`7U3C0xg!+I(}I6Mi> zK0v?^r>JhW=|(T;bnoydWcLBiPE=ioU== z_~Z!;a-jA6!Ut^{ekK~Ra-h4P{$KO{rR$#+>3z$fhkAGiGyWc%A+Vi=NeBA9y!?ZM z2)ag0KhNj|kGX<~u4b|PydY11;!TL@=NUcyu@Vcc25bS}3K#xNH~5_y+`B*60=^Z! zf@gWb&&mk<@)aS%ztZs4FdO1~1VV(owEQ#y=tE@uMa6P+Z2`dyA)%N8eppWu_$?eE zvmU7E4di7Ss1VrDxEcXRPdDii80hCi)Kt;dh$#{b4}v=|=txfdg~Czvn$d#Lvl@Jn zpa(4r`CtLIqBjsNuvy{4lVFPk!+Xlm^n`uX0i7d`tbYph9eh`ckX4PL=nX^*uor~5 z`3Dnhcmh>F7y5>{$HGzcn$ZI6GCoB8H?Bs&!GCTF{SSP;$b;_xkHa(x^*Bifm@&pQ z3G9ylBFEMQpig#L1ltK)z_+5G+JK)9qlLgISmIyz5TYF57BFD`+JxZJ_f-N@Ixv+7 zT>AwOeH#z9fNw>=ivYhX!tnN(&_7gvx^lT;J>9Az|Q=oewejW^Oj|u%lbkdIL=NWr;di+^pxw*EU zzeIDz7W7%edg_jQ?gSr##1QmZd%Q&R=Sc@h1nvG<^AOk*;dc_T6ar?hf51#IatzS$ zvw#B*FEB!t=;;EEqSxG0AnF0I1$=AIROlo3V63O|eW5+2FyV`VA?XX{mV_=z*Z@Uu zAh+-j3loN+5eSA~vfC>6n^#-eh9~OzHBNLP zpcb%H#EXM1;9Gf9qQ$|J2-*e9xQl})f*Xe1g@9VXp4#)NK6H2re$ygQ^>d*oD8#4u zIEr2~zd+Q`aWw*to&lQdK$5TYF5<{MzaiAEd}$@=^F?{L1YP+)rqQ4Vkm7g8En#lc{)-NIFjf5$@ZX@L!U#LYo@^MZehJ|2Lx_7Y!BAmr0pAK! zVbKHqyddtxi>T$|N&4(iLQgvf&O~6%;%WpO@B{3`--nDX;9Frz52F*D2x1HPR&+}I ztvfi1UNaQnp%KBkBesBVWfI>*!*X+NOd+@YG5tIvG{oQ_^w-|$ME9c!RQ+7=1|9B9 zbOQxX(r1Sf?Ch*SCc(pPT#bNZbr1}-BkGK>1$=AI6o2Q&a&v9&z0byRPQI`Od@Fn_ z3j6{n+$bQrFv1rJdVE{_?LU^%; z>F+i67I0d2&$ZCN{Jmy~X9%l0hJYUiQ#R3XD@;Gn2wyYdgWp^*ygerPLvU@0;q5VT zXGsutI*!z?-<@) zh5=^ocI{^lfG3cF=n=P{wP$lPFy!!wbv7Iv+I3Wz|7@ip?$ipp&jvd$M!3l+Z7cW-8g%b=5^Y{7!6W59pg3b;*4Bx{J zM4xP9C4|NR|NpK7wiV{;;f+;aurh+tHv0F5!qEpY(Z2d`u?M!9>CPgkJjO74A! ztg2*ZRVFjD`Y}rhw6TU3h5+*eKdfxQSjYsthGk=I6YzeJELvDgvbC0JU}3Ca%UT!| zB-k=c+qAK^CDFhCx%Wogh|Gv6s@ba@vifG;d*bAchy)fM+dK0-WOA4TIooLm*_R=$(-h~zPYJ19TZK-8t>~Ra5xh196xmVtg9(=h0&?pZ8K0#XP{VItw!_d=(g&|U%zC$bj+eb`8$56YDtwLX8qgvY{= zKiRzWli7LYrT%b;cmYK?W4r_#&JLg7Jv)=^`0oO8Hf%WfyU?GVg_|AI zvOJO>J2^WN4w5X7goFIb{o!!ELcbW?+zT`?_lj`AEFTD0%#LKgGJnW^XLbmC+}x=V zp)$80__;F%er`XuDuRodwL31b@LaO7dFT!cVKuA`)hL z-5F>z-LIB^Q5aq;ciMSh{tUhX3LBXB3~T@ecSeJV-l)}>3)S@sXnCWuzx!&`xYAFDI1!caJOr*!8s>AN!b1!(DT)!no2!Nbq0O4-2dHQ8}Ag8wtk10O zbODh+U!mAHuQQyCP};b-QRXp4-#w3IFcgRH7k-&BOt9g2g7Q#)lt6ieyJo}Ayp;S< zfbs=KZmgQZ%t&6mzc3V-C{y&s_zS~X5&QBavVo{je&Nbc&A!md`^u`+&mWjwGm4qD zO6!FStorzDw*#JCC?0%vKo__|?6c_S^n37GJW)0D(o%}*o0mrNn%U*2o7cadKY{?| zYmFCX?Jmp9i_aPK0itt8vf|%!#w32tc{8Cfci{@>0iJ~pD=pM;EEdu(peTdJg|BM? zL$T_5pr~qeGhq1zl;>70dd`OorI>4fxl}s27B zFoUd2>73qqyET*@Er91WK})nOuK-$rkL!9C2eN9Nh3>FtV0+T9YF-g*x&p!#o-98s z*E^$fgesM#+i@iz%Q#%`431DQvV_BxFihaArxcPbDo7x@Lw71+->HU7<0-Sh=E^aqh{L$(k8Cs~%#+mzA1(Rn-`m`JzvAFRL2k zGGCHu?qyYDS^1^etMYP6Gh@Dl(oDZEj5O2liy6)IX9bF8MncV@nSrd-(9B3E9yBw6 z&g%*Sl}E5gRqu@`vAu9zUR|fqo!7@HcSj{-N<&$hm(mVbH5p%Er*uW308M$U3Z3Qr zm$z6}Af|u?bA=CSEq1GOjsoSNX9omcD8Ev`W}}7rD23s?t_dcksGd|-?ISO9QZrt@ z-|XtB>`3AAtTafm7b(s{Dhs_?Ige8BTws`nFVInd7P~AK-k3Yg)$_b3q0&X+8eb-& zeCt^aiqbNRs!Ehb;wllPA*Js7q7jAPR)1a!VrFQrWrj=gS`0InRGVhF&X-DH2K2(WqL2eW~u81$dDthsg|z@V{@aFWPd4NpuSnM;c~i5ca!I87p9ZvV9)EG z#U1jT%QFf6vsbW`LOOKeC#-+g&1T*6#A_Sy*~YVoTnh1={&}dPeBRPi;z#oI-U2q4 zEnB|rat>_?oq|s-rM7?yBf5)DE%(lgKLyxDTh9Q`@17HU3gh;V z^Eyj6#BAoF^4;@LRSxl};}pxn4753Taf*eU+dsP#Od&2MHWn^5ZdpoBEC5)(VCna= z^uZ#ej5#hbu-G5c{R+L@rx}0dJ3nnGOS_tTA)_J1Zti%M`!hm4SBDMbc{*AdeU@1^ zXP|P{ae|`BQUxU3yk|KwEmwC1ex%B&xy%bo=C}nQou-RPLYWYzjF@ z1=uXTsL(4y6(tsx$DEP4yt287_vll93YUbeoYK+^SvUpQkV#X5#SRlC))cM?nJ|UU zD32wMq{h=r+~}Z9$ZflSGQz?lN3{8&^Kk17h%!y*fGVImC$37c9GUUf`bkxR1SDQu ztV2=_5|B))L9*vl0~64&I)rF=*zG7atZq)tpaCiS@w`ZzK?BlG_sc@6%%Qqj)`JCP z!X#5z9i|$XfW{t)gsnO}bwB~pDIRB7cTZ77b+{Vf0=mwiV+A1UP}RT$G|b$^CDq}n z1Bwxy4^t&ngvv!RHq5CKC_>b$1j@D$cUKJ+p>ih2hB;LNMTmNpK!rI~LPe-D6KTV= z>fnm#Dt|Se8|W(GB6Oolpcq*-Q~}kvY2#>>9#ug_s3xX2G(50Mpa@YGLeYS$4lbaB zNFr-A&a56RLUt}xl|T`qarNHFUZTyaa-&7aDq&(&l|T`qy7e66s)H+{t9%wEbd_)s zxaO${iA8ssxG<)oqU$R~=kHMVP6dJoAS6_+xzLPC6jsc$Epq9}W7`CJC+)GN44P&!e8|ZnOz7 zMwkanHDRoU3@D-KS@d`v!Unj2j(2e`bPaF;orFhDk-*$w$0yZIuo<+7lpE3|4J*fC zt%aRIEC0m!Oflij^96G)co8{I?c8&&g)O4R_ltRI%E|E=l{y|Z zvDbRAfNb0&#G^@itOLps;p(HNlYik2&kvr^3>{EEkae8%_Nwfm+16_U3BWatu&~UFpRR~;i&?O5mlPevS%(#l~4f{#F_IGsBS~1 z8|7^p9@x2{18RjRv`JWl*EXPSgbXO1h<84u=YS1pwK>p-YL$}gv7QIGgjm^Z7GYPn zfIQwh*dkhq+u?MKX2cE95$fp?592Fmy|a+x*?{~!@B#gUzSU@|!&nUxkW7!NVY1g$ z2b3W?51wk6fX2PKAL5BMKi`0Q0`Kt&Ms?4u9xOw~_B{`>TCfZm@!8Fsz6K~D8g=k^ zHV}v!n1E*Tk!qz1T<$gFxd{Q0}6E`+nyp{juiXzbS- zs7nn@KttSX^()TSQ!P}4s%G9Xt{SiiSe1T;RRz3xF(g9Id#CnwcF*}|%Y3aGM!N9$%( z2NV#Ed%h%C9ilp*fau^DV`IZcu7L??4r^etM^p!tBRZWnxk3&PDq#v}8ih_nR0S1K zjryY|)wmiYMxtVCb+1cUDqsQ{mupgoq!J=RQSl?$djdxVNQC6ohPwet1xP^BCqtw$ z9JdZAM|9A*Who%OmfTn3WSqAo;9}9@`;~(Y#We=zp zDnn(zS4H%4T(w{svYKa^qpAVQ5Y;_}98(=sKm|*5#Y^iRRUJ@3bn?JepRB`E0~633 z95feC1!4$XM#fC8fNNi&`rn1E)$89isCcK3j`-^9Al0~`>4t)ODr!m0-=BWv6|0bLbfK!_NcmsGA>W=((rAwPURgiU|};e8G`Yapl9AOQ)r z>rP#Ac-=kLLIqTkBSuZ(V|749M6Wg6o5NHA6;SzSc@BNQ5i&v<6_>7iYM`u#3@9b; zHr#DBNQ`7UJO`R8pcqkQ(sH&fHK9tV0;)==BCblXfXr7ose9tpATbhO5~T@IHCRBV z{oI&tQwLN;H0t!nP4CZsu^O<5Q2FoYLRbwL5X#-tYYH>10}6Q=scv?8(wIPwF0(;wvnBClvQv=bcz8z*A%M( ziwH;6fEfXCGpvD}gR;U(HxFk8Xg~^`S1IT7F`0VEfD)mi9x$6`BV>fqo#}Xvd4jYF zHlQVTBvj?7+tc-60oi#l)xZQairqTLUDksIWW3$+pxKWw6%+_vGZ zg^p044yXOYrtE%@*1<*SShVmKof*+YP`@JnR|^}^jt*3F{IGx2?ho&CxN-yL^MTJI zmkspXsH=fz5t~W$+?Z>i1M116&dAPJt0giuKm|m0u2&gKW_&P^}6m zLgdfush!gkP69ZruyaJHs>0FZDOJHlXa+;oDQ}oiB~U;_zR94k;|J>y)d2-Wr~Pv# zRW<X;uD@(W)By!VgjA5i zt00Ajx7h?(LfFjnJ;F-BfRL%F3O9C${z230I0tM%d(uA|ljm}-@XT|-W@riKJs-{n zzzkuv%{zP1C)F?k4UhTUPw^bE0d3!LQ|ebXT1lEg=OL{y`DC{qW37NKqt%{$)R`U} zH?4SoZw>He^y8Bs3VS_xKt36rwVSKX)I!Cm&Q)_q*y;cSLJPfYR26EWa#Z*W&w;82 zDxh*wZX>RGuz<|Rb2egYfD7mpLE4C`4k#dUL7k1L>cIlCLl=b6h^+xGL+9dfn$gw6 zW#}xZv>971ScHrine!m40Sm~a!Zo6*0}6<+DOwU$BeEvIfY4)Ww0bqb1$0ss>Q-e& zl$!tpLR)aBlg9gPfXmRGPL4a{CavnM4l1CUjF2{(Fx3GCL{w-#52|{wfNa`pno=c1 zKq21l4yT^K{)Xake)|)3=2^0`fH-Mb`!zT5(9;}S)wT6iW zY!z?;ova-?OQnwIW?Sq$z-7cda$VLi+km+ix{R6^^h2Pog$}6waU2b^t%WL~s&IQ{ z&(!0p01L?E#1MgNj%;d#3@As=W&3)=z1D+e$g17V*`1E6VFDWEm{r*mGCXxa0TFTi zhi!Yb=P`Dk3p$|Ip%LxT(Xh&-lg+aUFd)<$^CwZ%*$?W$0y25*+NDD=>ad;@KA@LG z(JmdSQHTDV@BuyB|FkKTtp+BbQAr>|w%ZR+Y|}IasGbje4)VHa&1@M&*z4c}`orP4 z$tzb45|BKs2Fd1C0~64|LkLvtHycaq;Q~5WaOYeYn?VCo$a;3XM%_wmfD7n!%Y!LZ z3>p)0ijalFG{VtV8B?7*`sJZj=_`zJK+l{re&|(w6e#r!@JQCFa<5ClD)eGCosQ@9 z8|t}8ro)+YSveTHU;a7zX5c_d?@YHqXF7?$Q+`0?xeCX&Z~`*~!@e_ZHE> z_A-MQgiA2k7eW`(*ksf@=pXhElF$bq$@+4>;j7e!-^#GW54I6iY(YG z4QE8>GPdB3dnm)6#ui7!t`r#uTH&OO%7X}6srDygISiYnRj zbYsP}W?axcqowC>rTbhu}K z^V9dK5^nnzrEYd@5D-D`#OehdJmqrE<4BQWObcGVLC`^2*X=xb%9m^6tCK zw>H|>mtIX6IdPr0H#csrv~MikSo#_D9vz?rY2e4me+BFBoYhF*n)d-K_ zBa&kF@in1@vkV6hb>)pDNxt_Jukde@{5XFX?34TFx1;{&qW(|(!5sc$QU5~J|NEl; zYf=9vqW(`u{XZ4;e?IDeG3vh)^}iJL??(L#QUAM9|K+IvgQ)-IsQ)x-@zZmtuyXmg}D1HB2)c--$uV)?pJC6EwP55sV^?y8c(msy*FGl@ei~9fhsQ(wD z{@)Yze?98|y;1)!M*Y7(>i?yvKYowD9QFT!2>w^1{-22Ye>Lj=gHivtqyA4u{eL6s z|3gv#ccT7(A?p9lsQ(W~{l69U|76tv52F4*67_#C>i?rr|8GbAKOOZaTlstcV^ROd zqW*s|>iQGXisKOglkMEx&B{nw)YKOXgeI_m#a)c^UY|5DU{C+d&) z<8IU+@5gte{&+ur5cS9V@i^*__v0w)kN4x_s6XD1UyJ(V{rC$}|59{cUyu5)M*Y7S z^AijZsZfbO?>|+e0zam-Q|yY1mFI^efW-VQxA8I_Cj)@ z@Sh+*@b@|L?JxQ3e#N)H<@>)!t34h6@iqCppVzK$6Mx+&eET(j{nu&#l`(SP=;Q}K zrVIH`b1(n*wGUj+*L|D%_FDLU;Kr}fKIcAW_b%EP|7rN!{{Fw?+Yfv@qD>i(|8m;> zHAd$D_e=izU!nc6IUt|^z;*8W_VfP!cYXV9-+qTyJSYCsmA~cjf6uqS2;0&v8-8G)TLzRYEF26v z_~-jwUBLwpw$Dyh)YY3@zMNb=n~bl@?lic1G#nzdUp*e4^sY{OUmhNxa-!*Hdz@saySan=3U&?j z9vgN;kGtb`@AT{>(FdZa2}`t(!3g(AW(>+lKGKAEybqEARuNU}2hS-b8OuzD-oxP3ywf5UPoA}_@7m|Bea&TUe`RNTedBhbZ>=>=e(T=m{s#J%b8!A>R$QHR0_NoD$*Tx2yZv%oec8>d^nlLco)oDN6rbTMVs zWh-lad4G8mRG^Ff6sau;-spIehL?UbHElh8*>-BWaOuM?pZWJPzj=xtnBMP!gc56{ zUt+^g`={yj$*5=G?Wv};FXr=0*X?O8EMVZrVjuQmuv}ZO?OcpfXHGZ9GJ1%1K1i}R)SeC|b_SyMbAnmI41H}G zbs%hKhljoK`=V^aL;vlv-oTf_kG=G>*Is&Rf>m?R(6a-(OTA$&4eX_=s3Q++IyoEZ zZONsczI18wQre%S=DpzMb&fhRYNt0+DR`Z6zXt*X!a!mlEP}Z13es9HF>lgOrN*V7Ti+o4@moFy6v+3gSa1mO7@m>Vr;6aL* zlJ>jlrL&tClU4R{(h9aO`X^}@t2Peo?$X7?2(H`Zm)Ov!DNN~<9D!7emIaV5j1JOE zqxAkr|4!`R{Yzbb_vtpHr)Pnq24~Q4?3ZME>Dmpb$ik)9p&~a~dn24W%#*KoQ)93q zbAJL5c~ipY{wX<{4l314)EHg>t*BQn!C5Fgf_TN*)g((Ef`s`N+ z1AcaFH0fq)FS1Cq-0C~N0(CrcfdMR_XpQCM?UWfYCoSD8Ddd|w$G>+1T`XK663yc` z$>!~T@6pYho0x4E+^xLd|I+2fx9l@=&f~C}`mCIf+p}@jQ{Mj-Pmwzhdsq3fs~jzR zIlVL(oGtNuY0|3z**_P#)ZCB$+J*QM9#ioee<{tyWNU3}cW-CCy|W7o+-@b&$rM2b z@qAyxz>)d>kUdnMi~<+zq{pKp?95~5kz)iN_K-;3AdcX8VH@Kq;%Yn6IvbH}p_o)0 zM116Q$Nov8sO^apQ=r7xU&pFQwF#)S5r#c7yz3LdxdOawd?hwss}n+}ga` z+W6vH0Q0@;Tl=lhC2?kN4GE~VytTWzMt>x~I5A~;bANYbYtBIHz3u%wPB3ZhtnaTZ z?;*f&dmB3melT&mHVGyiPuo7~jNZI@#oW3+oU7RE&4<9>> z{mIQV!4s%3X73Z!hIvGoP7iNz@91HhLdsno1-|x}&}|meDNH?;FIrDVp#WnZ1{`W} zcsh6@wr<`AsBpr*rl{w1_<%TOA_k6#3fMSISzuRE&`;7%dcY}H!;|#USmbPMWO{mx z7^Ij+s5U3nDIzwV4%0*I?I{$!m)f?NEJbn!;G8l6AAu@*BC~LMHW-*LlYU3-%Z z^Hcq)Xgvt|XW>$p7X>ln-w)5AtH@|q^b4m$K6jx3mqv@1MjyX~nD~xPo#kz7KFpDu zSKM=Y2>nC|Sg?z@<}yrV-StgIokyp-q?4MLV9bSjO+{cIo3j{%NA3erz zOL~tBqK2jbb^9z+9_RI}|0!8djdWqca2N4zYD?>wV~manDvUU`Nq59#h+yH4TyTfk zwnbN177*fKxIVfCT z+Te>Xrew|;FY)UDjnJ=nlllWQckkgMN~Rr^{G(yr^-8dlv2o9#DX8q|P_DcoIsQwW z-NKC?FS(uBwKwgxSpo>1);gJ>I2AJ?N?Ho6DKf+In{xa=e1_SRmA6}qRHFJO_XkPaTY$T2Y)6sN1983Zwp5jM_ zm$f!-@9!<|rk?g=SwR)+4vmUR>Z7zK>aO;yhq#hn2ORYd3}TP;Dc;!liQWgV)DS>Toi4z#B^K>sqU%}H2M<=oLNJOT3VS8mb3)$P z7`^am#i2SDmT7uz7oAyr^xWQIv;!9wjymJ}=q&lO>I@!ro=jLVPN(rZP}}Q)-J5Bm zD8+8~!F~=pm}1#n+J!(lgXIDGi6XDmscZcg(4O{4X;v_gv@(WG%SMi2$mufL?HDUf zX_GjIK~%4+o!f%3m~3{&N4*un+(E+V*-|k~PAdXdOGYtywmDhuR7W{UJBRFKWEqIO zy-9ZTVSkLWXHu4IRD^z|3-2uNZDVk}nci!yrOW&2dpq~`(w%p<)4h$>-RCbS{`F(w zHSN8Yn=+mpUyuaV5wamM|#ZdE-?{Cgjqf>1Et5usLGbIPGaPooIKM zj%0>h?m@Y9Nqb1hfM^Org*`@ev%zD!rpZ?(3UnPO#Eo+0I(ptMln9wTw{C5$Brv}9 z&DO1Z>&6jR_e}F~+LSRoXQ`DcBOZtnf<6;8D8Agig#lQ-cW<@yDU!DWUC85n7hd%2 z~9Mc41+9+@CDIWX&{TMCaYi+X6IBT(e`Ksp5;QZ0|uVFOgZX%-Id)xzi*nwssbNg~*vO9d#8}A(M zKN@e`cq%jt{hOqVkz z7n~RaYUH(-Afj!8y9d~_7EJ5vqk>Fv*DOtlf-%3TzzkIwY{;8duqQ;%op@|W36R!%)=pe;t}3a?VkQr zeFw+=0mes2*=f2w8eukIvrZ*pPL|Tu-eKo#Fm=+cmQXd@wjO8s)5Nmw8(0 z*8R_#j?po(fAJ7Gil?QxRx=sN0yG&sadskPCJjmnk=8OP+LnrzB`5hqWKdvDx8S3c z+04X*EIZG}6kH)~3jDE238RIyV$`B`jD%fMaHf4N2q!1`Utv~sO@?Ra`KhR)*|4~~ zW3rtk3E3um(^x7u)Wr-X`x%R3@Z|E6ZiCv>SWgrWpI(36Z(w-#*lw70rF#~~9mDI* zKBmQX(1Ck&)7IMUw@vUyp28XR#Cf7@IX>#lI<3y*=#mE;nGzuk6Wg$RMhcMVzl>0l zS5x!exo<$&L~lrpMM4wj^rWkEpU}rM3i6?Pob_ZGeg3p}#KU>m%iJ#BaK^iki2(zp z-D+-jT@n%VE>vl3_zoauq|abSd9su)-`d%eV;tL&gyeo_VzwZ$qgN8EKR&A zfhF1vv}I6{5Y>?oip+|w%IPdVy-GWN(0|ZlIcn*kXY#8hg(rAS%-ejoe=8_c($Dmj zH6cGjdV;RB*}>X*tQ8&krtBW&l>xKvI~b%N_E|N|kf{yf9w0g#+D$Jr>&#?!>&a~d zCep;Pc|NfyiY#^{dn@+74y1SZ*QA|sfa^zj4t$=BP7{~~P1^v$3YkLR{+h#IyqJcaYTe*&sZm5XgJm)_PO@`+H2 z4VH1d+0f?p$$c%>i^)ddP-)SDyOa>oLKz$ahr$Js8sIHKs8I;6#v{$_;N)$7WO9`~ zG(A~Lx3EpJK_E7}b{>&r-)`Uw^<<4yo@;6}c4563(%Bx$!dm7|yJel;ef@C2B>;#TH!q8m>srsg;8 zRz(z^bpla-{A>j^gx=HXq5a15$2d~GJE%-BS7i8@=ib?c)mqwS9aes0k zvUtygMfh!|`=ptzbqraS*5UmfyDQ=waO)1F{B#ku^x(|-qhveP65HxLAvmZb<+_`G z84=)f&(JvLV|8%RoISQm0=Z2Ra~I1|nYL`j>1FYHNQ&3HMY^M>Z?Cs48fS5P{bbS- z?>H3?ak?dHpgAxyhsMM5cMrhU@SH{NwyyYt930M*A&2~E`kEPdl9T%rcn@9))Wy!g za*joH_$6M6=>TnAImKTUICp0E(Z1KMpzCrN3}nJW^|DlrrPr`0$pR#U4zY7zun)U; zgR^^knJBpRRd~kk-WDJB-@)Cnxt6P;t6R(7=h52QyXW{?W&}bjUE5xf-$N>@>yAVB zT!4z}u{b@9Z7XXm(C4fC1`&@^z>9nc&U8cI24+ERE8-Hwj~lZNu+A=dEfzu}wdu4RP8Iw~d*Rip*U{b6CtY;SprQM~+&1P~|hhQR=uZuDE zsM~WmoMnGrf^;*m%Ie>BZq_ZPCuWy#VE8J;I0)$c6x?ief3wxz?meE~g7Yd_IgU8H znHGl?zXO%g++=O>GkDFV@o}bM!j)v)4L8AR z#CD2LG9P6F#dnxfuNN>R$}0_1u>jT=bX|BXI58I%#M?xJh?2{M+L^w&qOI86ScYfO zN{Q#w{sBhn$@C3{h`qI-7_lr!Ko8k_2Oz&b5(i#t1#Gpl)e&LZxOC0rM1dB#fgph7S~ zOvlB1kU{uY+>QS|=6@6C%8pEpxC^rN69R@>?`_@M*(CgBuf4lWpb9crq%hk0&wl>w zv`0LKV(KKY;Oin5&1yXG#3ee3+G-yLXEU#9>1DRar1gGsumXL?| zdptxjaaPCypZMX&f=>>+M&%C?!iole+$GctL#=!$;4_Pea-SZ9_wU5?K4>yqB_(EKdwr+i`!-9aJmRmS z*|cu$-Bm0${({ww<(0i>Co5)VN_0FF9LNY}aI9{bT{5E3Sm&exBLIHOXBf!sNyQQm(Xa+c;qC4a>9_L$haQOW6(A2fBMd~7t?cZT z!SQ|!dcvgmiY-Y>&Dp7`7~UV_16Q$@u+=q5XWw;IJcvWKNqaMaE_cJ;T9xZ6!H()D z4>Q%QsPSS!u?e(9@n9;1K}{;uJ8_ihi4;Qd0$Ffi07Xw?$17gu3V=IIWYS@h+>M$3 zgSKYTVX4l_!Fi;}Dt_3-m56b8UC>#t%bz}GScykgOm_FLLnxsV|3?vPk>lPu%P2^okEW63HY;FMIllv`Y~(ggWC zVy&|3L1>O81uJ0Sk#?bq`pDbfwyJenl@^n*PS>*P*tHP2?fxZaX*k7EdHvqzX8Hyn z&qmy)1V584dB9m#nT02SU-1Pt`uyC`Gct*HQS7k+C`!pA|71f+aE~{KN9fko7S@&yv7#VzW+@r#!I4&l`5OHQI z)yDd^T!W-AxhUnmQ;*5DIZ<<=>-Sj)_>-sXC{k(q?MFQupMFiL8#rgajPL zU^){7%h>3zLSq$R<+37}GX8f|F?NF&Clyj5-5%7=Re`_k>_li;`+7VCf{v~e{D0$0 zirK(L&_JC#U5LB)XLx4E>PM3k>l$4*9bt6C>zg5%Y+{117cerR_oX+)H5WoIjh>+;#Vzk*Y|;2(SRkyFgqNL&6glO4-J)#p?nG*}e=-O}|!n2W4fg23#INvBbs=@nDZc!AE=!+Osg3k%3?6{#I8 z33dU4A_v$qUBI)2wu(`k(&qeR=D4$Ai75{$i-8EhE6o|Xtp~pL$9y8#S~0UJgv-_` z-Z!|o|AD3CrNvj$KYr~;^vss((^+|m$Yq?2IpS$*D;$r}S@mgjpMui&wKaI`{NJfV zrbIZa7PKtab41Cq>Tg8tnpZ7TYl248KgFHhcgd&nu298<^SoTJuz6d$!!zVG`^Xe{ z$2eV_I@7jbCAgNp&Xf}OBpX5Wfip!BEwS&7RZzQ)^%YzxSyt-G4l>lL`%WI{%KqMF zAg^n?{!^fk>#OUV_gZ%#x_is-tZX^G2(wpfbNTIJ2(PIzMDT8|y*(4byK-lFdwXrO zPub?zQ;5zkwORO^(-^@Io0$=mltQjX=22(yLn*>0Ko7vq!&hyJChvO0PcZl;$I}xO zO7cgX7|-MIQc)~&2U*(`pl3|rG8HQ zfx94=1mmWJG9NpYTQRdB50uR89xlqxen0>)veCU$;(Z@HB_XQu&t-Z8j`b*PyrhZP z8?xZ096nM-ypM`v4vW$Fp_Ug=JTe3+TV7!qvA!%Of?lscNC(Umh_jIv2@gTeh8EAw zN=hsTQuF$2_7=8$qN=69lYu{(SeTEze4Z+?GVu0ctb^cT3X(nELNOi*`#Rzy z%<+KYrv087^0s&O_usQmEvgpCwAXrPW4ramz$0CBf>$))N_)innw}A&3SfJRPFN@S zH!}0>t-MXItDT?}$9F9RaeW6KdzTEv7S+gh__TF*e}^=e&0ULSiDD5#IPdMfW=Jp zFL~#oIj+QpmW$X`=73d%H;LsF1+}v2Rm#KZoLWA_>Xt{d7~b+guzENfSld+}tqz7; z-(3slxZjn*aN=mi3XX7V)@`m!5l=1}r!BaAYY)q>Lb&d2-$g6kPD#btUM91OlzSp( zA-lV#Xk!49)nAt_ynk)+Ghcf7^5+&l{OaJVgTdv`{ap9T*_Fpv-u+Yf(MSBfzxbsb z=x3LH7FfC14=!R@QYcaIA)s{3^_i_J`7(Ex`8enN<*)WmC%r8*PCUOZlJa(EkdIn; zv@-eo(}lfFa=Z51h;S=+VK`&J3T(^vs9!ivcfi#)Y5xwSItFz(&{zfi8&A$m+p7B! zI7LpqOiS`igP2Al(iCL(xPNkXV%|yj(u}jfK(bVMS+pM9v(hkIor&VF%#t88qUBM? z$*ku8Q&zh<@UAIPpICO65Ar@H$E-xgF(Yxh4hiWj_tp;Ak8ZOh={ePTDjKnXw6m#7MTgwTD6)Wb z4WKuxDG+(YF}jZkEXb5BI!}1~E>=Ctu@B0Zi9R1lhw6NZF0vO%;9qnx6RudtEfh~*I&xb zt>soxU2>_7a{JuC?ep#qxg+L#%Cp(mwmlXV~TT{_7h3PiS1PFGX*en ztyxzwwL0g!*`Jd&$Re8>jnPuUxqF2+BeTFZ<*N5#PPnVDLE4e0PW2SXnN6J|KPwo- z?`Yzoz{SP{${>2_GE4%Gy)?OcX|jZjw1iRm>Ga+@antwV| zK{F@<>=YNDjH!a0#plVL$5Df`7rS76?Tk=w1L@Lg)|3t6b1bLfeavLw@hVDX97Hta zt>xvIy*eWs-e64~FpP?5ts)k(%_yvuYkbNWQARGv2r<%_6QZW53BYI5$AX1CIaHY; zg$?dJz}`3=tkIb*W%-^;PGl5gPD!s8nJp_D7*3*e=U^)_%l6EGt8Ec?J7S?q7@x}J6=^9OGFT2EpNV7%Bm(*8CEt!wsA9rn(YK#TR%g}fH4%i7-A#Q+^&&f)|4Hj4;P-BUEkGF z7u}0@y4X0l-A-)SJIar#nW3@aF!cs z$!8OU=srFxMVdKL&w5RkD>-LP%!@C@iJ@q;OcK?~@&fhcumhwD!b753ST!}Wh~NPw zi{vfwKW1c3KYd0~+r^sE>t<=h+_O3Hm!=*3f%)7s{%t{IHi(mBlD=?h@L>XR`TjPn+7j8D`YopO4bFuJ>w6^zVEZ}<3D9a<>_>&+LQ=~5EYhP9LH*QyEu&1RVGsXK%(`W zlYj&ho~f~pEZ{Nh2e(_DEGA(TBdz5Y!QA{u?WXEDgu=Zn%;bzz+;p@dJylwp<)q}k zz>Em!kr#sbSWU!7$9;Hcf1<75NnLHd>_^5+=`Jp81@`I^LV65^nV^ZsDp@ZnC94EC z7%?X+AXGbK;@8OEi+yf8)&@EV*wM6!sGT1K0y(e*?#$MUVq$ebZ5cW1dhQA3g(^^@ z!W$~@xP@E3pRhKxlhHIyT@AU_4<}jV1PUv9BxO>mMSDt|D?9z*p_PO)anzz95l3a@ zRMb1P#IY&VS679Wh3G!<;nU<^A&5(05yg<3U4)|L2!)G_kbD_gDBm%|L!_5(Q`KN| zXXS2yGF_FTI%^f7h&N_wMr(I%`7UL6Vq%gjvDZ!NgiK{pr=(EHs&W)c3N?+@;xJHH zm-p7z?`=9_29OtHkMXsI^eZgbi*Kz+V>4^$Gml!ziG?FVEJs|zQqrWSoh25=ODZBE z%CJjiQ7*hFGS+KT>(B^7TbzL!O1(knv19{Jtz6ko#Utfx42jaJHA<+nbXeOUhaJML zElzIHDrv#gT{NA`?ON<7NJ%A*}ohfUseH|C-z-8e{=2;g-EUz6LtQ&pOxb`TCf2#)K9 ziOaybl(@hu8Rb(a++ONlonoUgaxZGB9~MnBx{C-cvWtf2O5Ef*DmdsR`C1X#f}z?3 zj_W=_l?NRtspzt@-sGkrwG%lU_29UJ9Eyai*wwB3=|l5TY~3ewUf`4A;q;u~gctHO zT_WkYx9C?6n2oGEDdFn zTZ-zU1t`P+r{2GQ@>B1>di^9R#6rs2eDhK_>5NaHI?qg!j!3&4>X|^8YjjN0TyfA{ zOPH7=Dw;sp=S?Yw%|-afHepvv&V?>nGQNxxJ{_7;&mDqADX7Pvf@oR2oz!TC%U2Ow zP!#gQgw((gbL0%9SU7qFSyF5&T_qNZis>hoqan5w4Ce}-8fvYNPgBR*G8~$P-hnOF zGAHu!cJP+WrACz`OC@xX0WE|`Hh;c6#l$mFgB^AACYynzPfI6^!CZ8YN%ZC7@TP5O zg5;jPrFcqFgisI`9ClHtH3V=7Z1d43W{LIkXl6$VoD7S|=#j!Ch|%|DP2oHuA}SHb z=)ofkomalF<6fA<*voK6swpUoO9A@>&|Vlg+;@4Gq=iF7SX)lxdK#kjx;%`$9- zLwLtd1~&6cYpsO=hLRzx#I@}2!Ny#ypijPvINu#Ah6X0LwXwR<-rZZX;F6eaWfAw+ z<;2_BUEBTwHQIe0k=Dwd>Y!s!wc4ZsnK&kb`A%zhnGCL{+hn|GR$U&@HKuf+i&2>t zg9A-YA$IU~&$u9zG9xzBtnaF>dEOC(Wqb7#<=S|SP)aBk)u)?t#$dc69c7&k$RB}M zZb9e7V??U8H=~TmN5_yf#}y>)yV!Mwml&~=bt+4qw5zobhgd}x*mtn67gW|%;v=~# zP7PZc^}8=nVWH&`g@N~&+?7u77Ai`BE*&kUFDy+TPhY^Sqk}i~?9u0BmzfEtlzR_G zhS*_c@rq%TT;PHvCC++*UP0~@MAK_{1PkEzMFo$~NF7rC5b4g67ggw!x2|w9tCk_x zDsR)jATvm3@w~6W;tCayI9!#$+9rF&SZE)cQ{=F4ks2hV~%j4%$CI`ec${By1l15tfFQy8V`0> z4s9u)2Zn>&wAA1=bDSrYOdaW|8B245izAir7?nvvi5YbWU*rW>GB=-v>>tJaI1NHV zSs>%ZkmIEiHPd4*fr)2UA$^sw7dV>oYG6+^v&G8wRErTCyfJuz#gg+roF_(iZ_mC( z1ftnAy7oX=>H}9&mr1z+-J)}9M2{val702knf|Y3JymqZXiUitafKs!=U@;nTAU8J zae}WK7;i1fxKJU{YdpMlaK+lZ5d~4_h<&WQQEv$+ zW6Nnm%ePwo6NMkX;6Cnc5y4?OTtth@h?3dsU%jEa*WtK}K`Bpc4?NDD9#Lv7&n3fP6r0 zhyJAPGdS#JZ8%}Aln2pW6dmKYiGU<^5Y6bK(?qdts~AUc93&(O1#7*_MSx1nQ&ID(t; z{Gu6|Css;Wc2FWDmcN{|IWU+_V_DJ?!cbAoGI@KODB$Jom7TY<8umV(4;7Q#U!ARB z))BNApY^LqFdq&lgH$3*Ik+^nABiC07x=j%3M|CETvgK~TQXj~y+*RF;%M;D9&#b%AMfWCi^USI*u?>vIujhgxiI`?BFRj$<@C&ur|PXbuIzwOp&&Z*fIBF3Y`!NV zRxT#4}s5pR`hWx5jj4_nS1+0ZQJ6f~Lx)5>-t&UI+(TIG zBS0Ft!vBsJ#VMgG)lUUTk^3je&`P|~5190<)4WA1jhb2DLf@rNI-3WwDHq7C7i3*G z1#d$lEXs2ct1TfXC~2`qdF|zxr_=?RXO{m3BtKs#R57#`5+CvvG;A2$vpQ=>l7}y}g_{!UmerVJKzF&*x`JpjHHq6!5Wr}IW|7a^ zv}&vYDI|*{NCxaotGqcu^WXL*%7R5eCa_Qmn;;?iiQ@@CrxXH`{W7}$zDjUVCny}a zU~+I~^-`RDxw;W^_G(#vRa`lhMaBO!4>7kqKRTbupgUwy_?p%1F$#|HMXqiqB3RU= zvXe1kgg*tlc6;Hvv_^f6( zvg}SPeb*isXYedR3Q`k!sei~$fBtUo$p(eb_HW+oPu5hRZ-KbOE9t%r41{U)7?12H zJo4E%OjpA}%Rw>8R<>G?NpiKbn_WA3hOGU=M`kVN6bfY1lZSue0f%hN#3N{*<2xkR zxvyGUKg?N0bsB*2J8y*wY){M)ZV0b~9`8DZm%?{>j74UDa>rV|BqBz&I17TAj%A2h z;k+BCvRWkw$Q|oRK|o@8a-8BLm9I6+>J(@6S)hSs7UX=DsE}w6cRnHn@6e*Yoxj5y zg?bMW&&U%sr>RpZNGcV3l_BaOS_ze8Ldixb5p|7RP^K%2>w`M&e5l7VgIs!ywRd8s z-U-U7w;K6KZ~0K*zL~eRb??^7o$Zhcr69XZO}K|es0Cl%FC><7 zBVSlB{2$6Pmq1m9uEqg0a&r_44(Z62kAHkoD)GXKa|owUhqIoGwD$JD{s|iV-bQIL^U|{Pn%W^DS9!o>zo|q>LYlOk88n za1sKU;YZ5%g0-3Q@XX97UB!PZC*4GEBNFnG(~fk zBnY05uS8`RWZlT0%y$*aq+zsAFTx;Oa4{<2elLWvjE)w4<{ z072{E4BwKss%T zw;<^kQZNrYd&fKFPJv`ul*=i}DTiqt_mO;vrhubg>bq7jghN$SC8u#?I-Uw1b~J{C zi4odoxR!+uy_QHo*Qq^H!lUwa1Oaq_o=#nfumLk-RW5Jgwh@NvS=PL>Iiuk5t>T8^;mNPA}#GUOO zl5kDq+g!Q3zqdtEBrzq)daGvMvQrJ7ZZX$TKUdI_qD1D8Cj7+Z&X^q8 z)wAp$plbP}I{9=;xSI={q}K}}DF;_B{mfEA-BOCPJ0Br|zJxRdpSOt{$i#$VWdbh> z$q}XN3B&A+?c^^Las?^Rc)3iN6fH|Aaki!V#G@Rc>~>W|A5z4Ux^D51*|*m(CU3rZ zX`+9+Z@vj3yP5t;j1dyf%_pH65UeUx8?7kkqF%YPwsO~taIz5N^K~J53&zB*IF5EX zRS~>5FP)h+K060FSS}W>xoQB|L=Hx0R=;GbLurucv6=YQuh5u`p+-WZ#oh&PHDY#% z%|&VT57wLuut#S^f(KLtz-KavYShWlZiP3Eu$%6QqkQo#^OgsDm1n;4cjmS#83|NS zhIsQ=Z|I0rmCWUYUDy>h2yS5=&F;xp0nAI%IYXO0Uv2ZiXngS%yMcWLzEK^B;Y$a} zJskQWbt{eg4)aW(n4@;FZVoH~QWk~`%nPW5Ct0&*M;Fofkp6S)NKms1v9soxhD zYx@z@Y?LUdEzSisQ3Y$!UV4xY3ER&McA+4tQDR7f(d1&~8iv4b+6szCR@;?x5>M`{ z-nWmthNP*aBwp8*S^>O9g7DkHf7{_*e1Jkmh?a5Ss7nL@aH=!&tW&&oU+icvox+n_g) z$D$*T;Hx*xf|eay;ZE+6$V;8hz(~6UN#_oYlq~~0N^}gIF&dZlU&(5Mvq2)v{0d{4 zvjXx|^i-HSREU-L9#xBk=OCp~;1{oiv=JG}nLS#$J48c#U zXoMF#o(;aRXJhAjO13bq&H3T(wLSQdmNH8CvYqUi+;+_B2tX( zTnuS2$_tJ0#5kwHa`6!4fW7Fn%Snk6XSa2pCDJlv*iIzQlD9-DuRE(ybgY+^gpQrQ z=2O{sK40Wv%d!%AR)i}j5^S$*?CPt|r?esfvxXWg3LsZ9OI2*~!!wmrkY312As&&h z3aJp|R5pjiDgim`{4-oU{%!HLp+v-B6XB+#yCiZd;Zf2-YDa<`y7Urr!5|Nb-`|&^ z23Kn?`%KaYtSP$_c^!h8ABpX%=kd}ui64E(!d5N92Rl(7-J_tMrF5U2W8xLG9(%`G z&&kv;P%ViK`GR>q^aUdX(6A`1fWBZZ$8CAKX}*LEinyP_C}}CgGuxBMozKcSW3v(g zD7#_wNYG($527p}-Oo5;%;h2@twQ0w>xCK-o9P1El@%?u0xlrj_Gt75@|RdD3Urw# zGX+&3MMpy(rNu#7mqonV4E4-0O}p841!W;kt`I6ZulykXS9pI!rhw6Vi}E`txizeQ z7B9!VJC%M7_G;|cu(vkr*gziKMtN)XMiN`BB%(hNj>RelMXddH zHu3B#y;`ryrV!~-2!``gy;?fD zP*Xj4@D9$&+h_O?xr^stW@}}|zKP$9_>Dhk8g*dKi6o{!IkTqK9D>BSN^8V}J(6$<0{q{g~-i00CXtj;Te)E9*93hB$ReA zV<_5 zP+`~=7YpcANo?ZcV)BlJBG|lCvjydrAb6EB!yslEHv`BNaeH6JnocH1R?1DbBCS+f zVz-?JB4bhhQHjiC51lB;DsawF(}0+K2rZ9gsXhWL}m>{ zW6XjhPtwBnjzXvR-d-y>ompitg9SYC#%q1ynP@S z(&fkr-&z+$fk)E2X;LsmalDv8dRC}}ZSCq=fGFWPi?(qg9-ezTLZtXLeiU7b!cmSK zS;=W*u+lloBLJ85M+1%L<`br^hz@Tf=ZFh`+SQm4&tF_j7TyT?G%mLHt!EiyoE6V- z8WSEelbc65L4vCcIvlyB9q5$eYxgKK$qh$Z7ArYV{m6r;LK!~;cg2C;?`M^P2w?L8 zZ)VVTkGYNfMghAg$?r_vRQ8GrU7L-^`DV;P%;vPRKU6h6d;IRH_K+3(dD!uFHFDFO zYP@0LQ~t;!PC4e>xfx4!0{Jll1sNa__hcm7-T;o_%yEiE*Q)17bl$9w4A!C2WUt6& zf*b=Yw&R^m$HVL4PHb=Ju_P*`IQR%ayEraAks4ku?&N#}dhL=HHO2QG^@FYDF#ZNI7 zsMdzaJT%a0F>D!PbnGcEycj;oJU%YQo1(4 zo2cLhZD1-Kr#^8$ByTv$#>q+8S(f*&j#rncp;oPJiYz?U|F|f|6w6VuzGnKt?1v zwcFY9kp98h>Q5%ay*Y zplm*%O=jWQQW8X}Nr&gw3BfkEc|`4<0Vo@F5)R!kLWJu5) zkuwCe$gy5Z?bc?f&IR8fP>h>|+G&f4p1OmXWuN7n>O84FPl1fG9pxyNNzBNdkFqR~ zyjL8DSX?|Ea#EQ;?ljw=-pOqVF5*bRBNrV;f+yJ*66JoEp>>&Kc=mAM$n)F6%TZ1b zb1IxsNpL-ADHh_EjtFt&))P{HkiZ5UZ((1j(i)ubT~X!I6DlMuhtQ^NZ1q>t2-EKj z8TG4Cp@|me?TR)y|AyH|xuviY;2i@A}|jaxU(eXi^|+~`Y}uH7(~x=W_pDqC446R^1nZy<~n zK-}VPaS=4adPihFUl|G&CpCNfc#~LN?mtuDp7*Q@F}{#$mcwKZ#V=?TFf$W$P_cGQgz#lS2(-^$Ei#(RtpVIFXWM^BWjIPN7OXjxld%EuXAOtV74 zWm@)OD`F@d`0NiHr7j>gON9Y299f%`XVgk^jypX+x*YeVbH+w?2LxlFMb88?bHDG5- zTxdLTCB<0fgObZ^-IdMOZq6bRGHPLVVbVx&p0M03%VR87BFgK5Lh*R-z(iscDbcY^ zA3`mH*e2jdW@Bc}MYP(KiZhqLYlp-HIgKC4%4qld}aCyG6at^i;A-tx#v%PqK*>;w5{}RVpYoJ_hYnEc8A_EUOl;0sq2zmGzcCT zZ_MU7;Tx2XlVhBn=;C7JSzRoFz0N5q`<~E|c_m9=wkV_^q8JH8_08x z|1*KWS6@Zrv$%(&Q^|CZOhBp}LYH@Y*urLA%Z}zUyY1in9-@pDi~n~nEPA?hO!EetB<1R#dE+h?Yig>XbxG2~j(Q;Dspm}aHf4;v%&0@OJr#&nB`qWA z?%?PI|H+smJyn6rxlV#!P{=r8H(=xFqYWHoK9KPvR8N$H=~#&?oCn#J7i4PYB^W@T z+(0|!howmgln`gY&*v4n?28>XfC}Ra400#T83rXyp=JID3#D}j4(Q|7Ezxi35vT*V zbs~?IGr6^Pn-7lQ8!K4D%RsaNndP1=i`ehqQO^!Z<+1<4A`>`&8aAW4u*+n*x1^*= z<8Y~?76-P*Z?z)2X%PpD3Ol%*O26oJDIt#)fR7k%^vS(wN!CgS#QAS_8EMVtEijo% z#mig0sZYsp(uqm*5|zCq{7et(a#l#Qygr|iz~I{8-Qu%zIi%Ur;^4UDdlZNqz>X0VP;g32`CSN^8P8ff+bB0+1 z??SL3nASWdT)PowY|EwwtI{8WveUe z^Hi0PNHK_<#BOEn~tYb!bY!NJ;D!uc7KVht{x4CM}uBoZDw+{ z&k1$C$<^1dUpL|+lU0A91W%rzj#GNnQQS0EJs2>P4YM@MLT1V%up3=hvDrk8VQ5H8 z^Oxn+-?HVdn3l;|kfKowWT;IQY@E}q>~X#impz&uFI>*-_5OTnqH31Ve)|?wBP8P` zOldr7JOv!eYPBo~>J3DYx|^&bzMHzDjYq(?JFQ{=a{6+*O$qihRV5MsWS5$s!D?#Z zW^tQOt`~;3WC0=nc6+#dAbYQ84o3x`N9nvq9 zJ}A-Nh4k)mc$!gk9L1S0;WiZ(ePvdBypZ8M%oiHHbC1%8mOmKTlpIl?L+B|7E9GI| z7>K9%n30%4a3cpF^3#cVUImNJ89csN zSu}5o0|a};#wlEglaig|?nMKLUT#DI&G>={ToS9zrtHXmzLPT&!k*WGXz9Z~>GRlW zyd8sZCPrjVy=wUMvvMLst>5mA?^6bY!nFgaGg;&EcT9jhB_&<14wCiZ0O$!so*{|g z)p&(o$2qJAT5i5!l|s>>sm{w8e!_*6;N05PsFPa5nAI_PyFbxPThlXqW+((O0xacA z7|9HH;rg{Juic=2!qQUi5U*qqPGmCf#7CdA)Wk4s=LIhdM>se0A#lqZ(}XxwJ6E|a zch+>W0|y*)NR7Q+y3JuPxlyW++|GUHvNsvnOSh*kBRyG1!MKC4H|UKw+*zq6_$w>| zW7-p6{y8uu6%$aDtCRwl2$QKCcEeU{GOxP2aD${6l_AivX+0|u5-BPEECX}`BbY4r zOX+oS$>)zdIJ%8Ln5bki%)A~~l*N&5iPfM~aR`)16M|hd3t21i*K%drr`y6Af1MXF zjC=67M;&W!$M&=Z5a4q&#qAA|teyAd%&Sz_ z(H)wxxajH=3mLIdKnKg5EBfenE9a2&hHT49ZID&mBG!xnKmWkOyNLW}!JJMb!==|9 zHYB6*a<>p=--hXt#w_)4Csy{U$QFW&Wcl*du=3e!ee2X3bQH1xop?~-%mw4yf%7vp z6oJ($P$tPFoOT8W$`V*y8DURA9tNLD#!$Vc(-_OHd54qoU|5l z5BXtNp7AiKHmWadS@=BnS8u4;F!GpAYi8LRPqnh$4(E?LnaiX5IHx+=~o z#<{+_yR*4Tp{+Qrg^F66JIgqbwmFkRtm_t$TG&p!{`lR(uH! zBXYkh6IY1+B>qqO1VLP6eUoLQSmCs1p~5ahsNmUu^SX)U7Athc=j60se)D=3z;!l) z3Fz1|ShTZs#nob#%8vso)hkGbyCNnt>i6xTd=+e*u4dHDX(54)6s-FVMz7377+gfJuMoBMxJa!Df)*Z!IUy|>Ph<{ zMGi4aK1>D@TcG2fWZS7h7IeD1e2?S4ES_j}k0W{T_~t4>8?$?+AGA3uYp8I*j}GgQ$!J_u{p?jqd95}J5!`kKT1)J3zn{`&qa4xQKx zbkYtmu@Lp0mXsEiE_7!@>mmml?I<>n+q{9s6fR-n^3J1$Axxpb1f|G1JkF<0D2F2x z^KEnL^hncw%%6q^EeobIA)P6 z#H}en{H8BQC69Ylj|E}6KjxfR$Eod7jKId`pAgbZ$S#l1bj5$~7X1On6EQ*7$gb;Xvv$BNpN;T{v6}xzLay+@{VRf41UXB=ZQDSewO>g{+3*FM$ zeHMrDW+6aB?5C;%*P4!XDOTUgtU|$CqP6Xba6@f?52?pwabU1ORSWi)vB4I_f)#zYAA)q+AmNUZ7#usEMq_8nv zA-Nk&3^3Mv0OeJscurUXq=cH}M* za5ikV0R#+1u!eed&T@bxhz}lVRCPhjwyB8dCpm5N*c{0tPAZG3<7{MtCzU^;go045 zZe*3kMC7qU*tK?aH31mp4RG3qJEj_ez>4Eb2SR&9RdN9bcFqfZxtY;aEzW%*)yKuT zWh$4cSwiIFlofZz!B4U!`sCE?BU%&{Ub4_+SFqSu2hGlYiv0J8R+*L2WQq4USv0ab zD*9C-ML`lUljDrsV7)u;i*<5?u;vR^WzW1(6$e}(mX37@2 zpWI_>d)%KtSkY|>cE$mxCXia@q+^Z>mQSj?6_%~swr=lRK~5(eBchwX2Rf_8X-P4- z^IUQZvfZqX_sqz#@o>ed6tuYKaTk2qNufFFj83*wYc^KE0qs)$oB>tcYdi!SrJEUHQ2eV zi259SPV+^8j8uzgW9Lj4 z%N-*S{PFmMBnqHvULvz;GDMYdH)+S^=$K~eIQt?9!E$8_=1{-N1cI#zfI8ul;-Gf0kJ4P zh2;`Y+y|@~-G#Lgg<>D&DSBqk&3o_Q>s%TBvd?rm&l zs`g8kAg}Dq8S=XehK~_ywvvflH z#KVwBGueMM+>}>i%Z{Aem01Q32 zLV3~s%M&ED-G`t!tE#_o>y}*=PNz~P3IF|}Kl5DjG5&rpe?P(Bze?Z5-Q9lorbd(G zf@`vWf8hW7i0{e9(+tF}d5*8EfBOeN_$$BqgC8vY=^y-H|KI(=58n94Kls6?`K-TJ z`FsD5{&;dBJ>TD7_~{={{tkcVg%{k%|5y(HZ~5Pff73quY5$vlPXGPypL0rJl02sy z;6L9aUAynU@@?znpY5w3O}=sCpY7lMiaC6X{x4jcKevBU(SM$MS-SqwHGcs z{Um?S^Y=1;ukxq=6wdm`FA<h-u`IvclrA>{C%CjKgZu+;P2n(?_2!+ zW&ZvefBy}CzsX?JDgzX+NO-4ce*3|LTS07id@iiwnt5?E=3(23S{RZu?(@y`(3&}sC{p5djA?fb( zo%S2FpZc4O(|&_CxySggUr0Vp`)jmQegCg7B)>%aDcWz*PHBIG_M5c-koNnuKlwJl z_qQ%2S7<*)`!VM88tpaO@6-M|?esS;B!uH7)8D+1e2U-e{%!78*ZrLfyi?%+N8t4R z?_Nkg@($P0ex7#v-3!St)BgP5yO4aB_Sa~CoAw)jpX=Y{IsLEP2kj@ncOm%>?f(D4 zebRoD_7m?h{(oLbzDoP^{}<1R_WS=g&!2Ynx0%Nm8K?a`?RWp*3(42?{U0+A+Hd_6 zeuwrWKlWU*`abwR`dspL+V6ktx#Ta=e&qK(m;9ce<@Y}ET=FjM>Q6kEe3SOqKKWeo zTeKhf$>)+QU*bM#@6dji_G|k7N1jW5mG-qi`dspT+Wnt?F1hwEbNwHCE*a5wX@8#f zo9T1OZ_s{=_8+MK`R9^Pw&|z6M*Hv4uG0R8wC~dXC$t0FU#0yT?O&$-2JK&^{bky} zPWzW=f1UO_wEr#bcWM7S+TW)A?`eO}2TAfKTZ3IPLe!N`zhKNX+KT- z3hgHCHQEnoe}v@P0yNc%2raz9DFMEgnF z`?L$RN3?Iy4rq62ze3xkeMtM5_UCB-EbZ55e~I>Aqx}}`zfSu%X#Wk`|AF=!wBMus zbF?2hNRoe>_9too9oj3jzfAji+W&xdpY}hdJ*NFnX}?PQmuSCE`=8T(llHIC{wnR? zr2Tc;@BII?-4A@6`G7z0C&{!8qJlvX1cRao20@YCo!t#$1%n_623fK;?4ZcnuwxZj zTMUXi#h|EDXAl&{AjqA%8>cAZ%$=gBGe=P8_x{c#?c`~?x9#q)ukF5PKF{<0KHum2 ze4jt_%%5_R-(#IWWM?jle#&0f*w0@w!{2h0f8ZpSIK#iO$o4zh{@lQQ?BW65m^I#< zOT0C^o*hMf?Bku7=G{2NdvT0udifqbkOe-JJ9rNFaESZ)C|3Dc9_AByjAQJ6P82t!%BMl)+p(7^_VcdH@SYsyeL2Yi&hQ~D@@($r`P|209^hkG z;}f{Vr?BgJQ8dmzKAUMik3+neW6X1kFJpnP16qypmPEnTPpy9^+1SKR=4z z%M?Gz0e+Oj+{vLCJyo*%&s^o3G_Q&hh}? z#2VkmCBBPY7e~=9_VEKu^CKMM$2rC_r}$YG_(ksESGkAt+|PezmH*1a{63HI5WA@OT2<}d;=G_or`=c>wG6WFNvb}u$LwF^TW*WA34fT(hv4VbDZH9Smal@o8RC* zF7N=q%NqZkOZ*YL^3MP4<9{;EUvY@P;~49l;-6XI|8fUA?qd71llyr?R(TU1<}G-P zz3hIm^FLF(GY5Eg4zr)*ydU#?5V!GR+{q00^5LxT0v_b!c!Z;jUgG@EB%jGmd@i?e zk|TT}eNrKM3AgeUoaGF6@ii>-^_=HbJj5c8@*PaP)cK!1d_OmHHwXDKX8BK?;Ac3^ zecaA3v&64+j^E}24{(v+W1T-_=T_%`_Oix){*oE~mZSUwC%MEK{*6VpZ?gTlf&19S z1H3V7yg8S6Yj(ZN`Ja8f6VtpKhj=fJF-@NVj2_4WAIcp(hkH20{d^Rwd@K+1i9E(J zcE8;DpD8|v1AIP*ImL0ln0daO+jtpwvcSE39V>hz5ArQM!X1oW;r!1e-^We-5Vvp- zNB9Zm_-StC=Q+#$+{Lf4%x`g?-{B!vd6Yk3V%qtiJ^VR0^Dqbb8)o_6oZz20&12lo zw!7N?T+cb)fD7#AB5%ezZ^h0_o&VX(6#IEsW_VAI^1htp0B86R7I`*z^L*~(Fc0uC ztnmq4;#1i5O6Py}@!3rCc^u-!9AlnSd>IRTC3o<0?%_7>=asDT%{C%Xxt5EV9Vd zfO^7nCSGU$2ii`ov&8a)w1X9PzTWc(8!t?3FLJ#P1t#C%_{;%j9%{KP zvlyN~OuJcS|E%peXuX*}%l2mCY}@ln)47|ObL8PU``>8(jQLsTJgevG=d09zp7F%u z`P$9okomXU9vowZGps$_e!}b{?0;`ko*8BzX?l3ha(F&$eOP1a&FXoS{gH_atOtuc z5b{UspSN(tc4D2|S$Kl|gvlowM@8F@Ii^PKzbtc+nJ20Lt=h*iW}j@oV~sT?#*Fv3 z>7S>X&fL?KXW{9ND=dxc@3$-e4BL;jXWD<6$Qf@;J=^x*VLoP9e!liGGpRfi7g_#0 zlxLp#7Z}G(UFt8ztN#`HjkRg*V*OInnZC^S`3LjOn9g!Rd8S_D_{tg=nZCmKeUI(I zO-#R5KeNDX%xts0S>vW%+Wk7~!OZKmlSLk3jot56{tfnb7C6e(tZ~Q+mzcZK`n*rS zz0vWW8P2i5gRF2VOux$dy$-A%(Cl<~kak1+LV`_(^LAC55j8RLdUmRbI+_4%aybC%B{ zrO=q3+OfBet=Kfi` zKWF)zVwpRb`WNGmb@qK;eFt3sF#TQQj1_k6v)q5xFHHQK@xUBQtZ+U&uj+>{nEpNM z!yI$0a~BK$?)b^{_l>VFTFycJ%Pe=Y%u0Cv1N$3m%zVlE|Im10jSJ!VkF4*0&zWWZ zko5@9c_=*pvGw?}=gcwl6WfDDmRaXP7JjOpuV}}jeq)6NW`E{*$QtV`{fBmZ)qFqK zUKUtpoky6f**`1#ony@Xr}i+Sckc|5gvnJj~1=wCmg2#X+X)&aW(ThKc_%4p`tqc>YJ* zIXq`-!SXo9+@Bm*S>_zGOZM-7Hl3p^{@L+1_!r~yU)1wg+lP5hGkHuomRV=!Z`$)6 z`Ttrk*0{*x-|bfi)E{+3GfcL1LpP-N|E9hT9rhL5mAjbk?1&Dr!k(&nuGbM|S>tx*uip_ZFmZ#9sPlW; z$qZ9n+Q~BaF>}L?Xo*#(|K0L8c0^OGa}NtQ>WB_A+1(MPzHj*)XZpq+(N0!)klCAb z*cUCIBh25lBbsF**%8eLZ>F6;;LSUt38rqLeN1za8P-|l<{zrB$NDnItt@c|Yn)^1 zmL2wG)0y~@=}fW45$0~CzggyPrf;pCEHQCN`CjYG38Ue2AN}y6>ba9?`V0fviqmzOLav3%yNW9PO-}EEZwOisYkRvD!Xdd|6X#Y`#YljEHUw)>SsSo_f{Ws_p!a0y07&K z(>eHGrl+-^x%=5)n7Y65@e9*A&eQ{pQ|7pfCGHQ?xfrGobhw#dI{R4TFtZQrhzcR+ z4(7Rs1@32!OUyn} z`zupt*nagT7>paZFIKV20S>rhC%ro&M{m&$K zGR3`2v%(AyGRq^(G5WLRGsyxsvB)hfafD^&Sm9PyIm;S%vCc9RPd09u%n~b{W0ec6aglY_nRtrr|5wXrFH`Jini*y}&K&bB za2re9$vO`(F=l%+$t9-Pbxb|%V}@yFIm8^tnCBD=EU?HOEO8IZ+|LTDtnx5xJjOb^ z|7Q75wY{0*0Mi_1hU3gK&pfxWz@02|FH5Yj%!91(2&;_#*YcQT;%T-wlib1-N0?@g z8E$2kv&?fB3oNt9d6syHWgcaPiN9MOdsyRU);Y+;(`|1iIl&aCndWw8SYnoQ%yEHv zF0#Nni|mZrq7r*qW9ReGs)ddaUat>zzl25a)~*1wfXE0`&eL_ zMGmpVF_t;S3Ja`q2W#BJI`=d24BMY69%Y(|cFSWQb4)YOAr?5sBBxklfmN1R;~eW; zVB(qjiAmO(VrPftu$LM3Gs_Hf9A%!9EO3TJ7FptMmbs4=9$=L<*0{tvyAqcFtd6LU zNv4_N5YrrEhEvS4z#Mll&pj+~KZ~rg#?E!>XD^dE?PrP^W;n_m^UQM_3*5;f_p;0d zR=CI-kFn0~_1gVx`yG=UV47KGIKeEZnd5dAxQ9jVXNgsod6-ouHdqdOn0Suj zag1qBF~b70+`&A{EO4Gh9%6|{Sz%YF<*<*L=h}YEa)uQygy~#lYC=1%XL;Pr0>@co zo;B`fo%@))(D8#Q9$|)^*H=G#ndblt9A=T@EHTeIcQf%k^)bt%%rS8T<=Dp}(=2g_ zWsb4JDOOovjXPN99wwfz-lW%~A4^QL%pq1d#ww>+V}W(-%0!_4p)v+VA%T&9@k01F&uk>e~e&oZ~M!kw&gFKeu@&Vx*BwLde-=$4kx zB-7l)47V`L5$2dV4gJ=xWpp6Zfm*hV})r}Im8;rn7GvVWs(J^xPxi#VV3)uW0iRx zW`W08WcTf?7dNxQLDo3NI;WU=rG8?XC1yFt92Z#NB8#lE%+5abv6of$v&IbT9A)A% z{lp|^m|~F`?q!x0=DElM>#Q(&d-ZY?Yuv&*N0^u~o|xoTrZ~$CcQMC(%<}*Xtg*-? zW?rTL?_l}tV~JUoIlSqs&+{_XOS!R|s=2_=9W-ix0 zW_f@))|lrK3+zg%k9{mL%`%5r;TWr&Vx6;06ttU3mYL!_(>%lsk21%uJ6Rt4SmYL# zIKm1iS>+7t+{wgiY&T|EV~$HKbMu{*;~@vcSVEF>x35u!mKq zS>q5BueH6Id2Hm13g8SZ74 z73O%5c^+Yb(OoT%NtU>YWo}`GBdjvV8n?2}Sted*dojr}Q=DgphnVG2=9svf<+6tb zZf22#EHTS6Cs^S$tK7~SORRH_$=BPjnc^bTtTV&TyIU@MnPWfm%&@>w7CFfhXIN&D z74Bx0`&i=v)>&iX4aN(T?AmPk>|>f~W;nzw$C%?3^DMBy9jvg-D(9J+)qbYgc@Ool zms$3+z!4UiW9dr$!aC=ec%%Mcl8a2S&MN!wY5lI!4kowTUM#-J_zlnBVtMz{9u6@1 zR{hR8ceDC7^D(uEaBhBzeVpQlSl}M+;3v3;pXPpko>lJWVSbIr_$_unpe_0iQ>=1; zneQi}36>A4kF_6Y7gIk#LQ2$^MU-S_A$GtUBRDe4+|`@#QiL@8vKuh z58=vxuAf+}*{ z`kl2uB%*m{{$w0&QI2WWIKtGDal|~gv%(S+e@R4jR@wg$?fGjWnr7;6+Qs_1by4p_ zEoc3@sGn(OnC)B_O|i)BEU^@(Uw>UR&jJsJ={y#uv-@F|%M=qgSQia2#bIVR&K&bB za2re9$qM(f&Uxm#)Xy5b2IcHy@`mbVn!_yIXkAohk@GC^5HsEDqAh1x{*Bi~IcB(( zInJ_hlXcNt$Zx(bT43eY*8gnF;}Fxm$}xEt?P8Aetgyx^msq>2`OlHxV_h`H#68zV z1tz(JSS!IStSmLJhEst4NAFThF+hQEB z$T_ASqCZ$?a>#rSHGY_V*t%$%HST3zogsqkkWv+<968eG24YT z9tqE%X8(AU=}%u5jWRpFE}CJ5MW&vi|3c0~;rTPQ;{yH29#%QP{Ie{Vb|y5lmcuNEnd3O~%(K94EVIN4=a`$cpD=Ne{gMS9W0BpD)z3_^&Os(#VB9dpsW6>~ zn0cY?`8f4oV!o{TUuyg_wbgiLk)4k>ox9CnyVUqFfAKQw&&sQevrz8UwofQmuzk#5 zyTbNi`gO)PE3Y>$nVMDK6SV)zby1oH4za`>vsYP9X5O^Udyb~F?}_Svv;B<4qV;Fy zZI&}?e)coZA(okAYKQe=nzJmu%l;gm@6`WKGXKYoOBR`9YOn3YEa#ZxLYU6Otg-va z_R~)ow@mz#;|tT=$^v(>%zYvMq;er=@+s;oTMmod$_i(h{FHL6er8>?$ikfU8Iyn3 zcnf~c{>mzkGQCf~J=J_1VVRRGe!(~neo=j_u*}?-Y$uj@h}kczhsBEZd79^6Gfr6M z0Bal$enUH%nAc8bxQhjrS>=3~{!R6=bij6cx}0gIzH56h!!gzlD$mjnlxOaT$}{mJ z+nFhLj%yElnfs}Bu)uLvf40s(&cpii8S3LErhe)8#tcVU;Wk#eGfe-L?HO_&W#ZS4 zbI+8sk68{g$8i=o%_6t6#1hM#W0ea`9I?HaW}P{9K1(_Fvdn%~nPH8iO#a6H#}o@p za|g5B!#wx1$SO-b%o>le&hDJ$|JHV6jw38_f@My#!tG4_&iG-H6{e4B=d(@!z3s&O zAB;;DIm0q{vdX=zv%*B({=+1XFwN*W`khH;xrupZSl}p&oMeeJ;rahqzwn$@)_IiG zKdy`Vo@+UOF+Y=k)gR1oKeMbd$HUC?7!$`VcS8R#%i`aRS62R4dztyW`Iu$*g_g?{ z3(@*07oKw~E1YGGyI5zL$+q>;JX1WxG>Asf%4A}_@0+TZ(eu^IB(vPa^t$!YB#Ye18s}MAZ~2q#T<`m#re9C{S>i5MufIMz z!WyHCtPgvbyutdYpJ@&;!?EzZOL^wFhgB}H#zm%XxIXH7f$1DzVdHx5k*M!R`ic4O z_0h4A-*|nr6d&+dCSm6Nc9A^Gz+QkBQG3mEcx?iY0 zOfk&?R+(j;lPum=zcY0^{mv2>nZ3RBf06Y_t&e7yzw`R&0L!c~bC>ne5)16Q#PT++ zkNTM778dThKAK?SZtJ~=Am=XTS!RjzEb~zC9>zspJMXD|EcBb7`FoqrGMB>i`>l76 zUw=PHy{taiIALXreq!<=w%<#%|6%H3@@)06#>32TDr&$gQPhamIuX@I9k7@HW!#YQqdWP}E9Je#i5(}JT zomD2EY5kezG3MBPsri^22y^>h1cA74Bt~l`wtB z`sg6bJi^R79A93eJhw2%k>Go*Cu`in>MrHOa~@>zz2?8d^Y_^=S$w~oB_3gwU9VMd zX?@hk90yq7FpD2_9An}`#v?O4%;av{Yn%MT#y3-(Wbz}H!x9&m{;2K7#2)j%PR=ZA zA2S~_|7aXC%YDrA01K?K#3iObzCP-Gz5d`9X7=i5);?iBX5v$hn{TjuCRzQA;}~n) z$~tG6n=_tS;31Y+XN8@!>iw+oz#>PO_?-1(mfKiiiOJ8~-_>Dc$Aqh+m2UR9{ZU7 zisiBRRqbV>V!MawOl(&#ds+LMeq`$F#v{|;usqgzn2CAYFXZfblX4tn`dj*wh3{ww zYgOa_&D#Gx^|A7O?PdC)=^_7t{roNFV~TYSu=+#CDJFiTUzlczwI6F|(es~}pVdYC z9}_<_KQo+Vmb;i^nR(8$z(XwZC`(MdRll)^6>esggRC*jIwzR;5B4@}mKCuaZCe*bp$GQ$e@GWlP| zUC7z9L(Tyvf1#h4<21|M#l+$D(SBw)p4$N&P#O z=Kxbn%CW#aOWejXcQW~B%VUPMVA}@wk}a=&Lo~%&=Z0v1cz*p2(NR{|`wy1iwZZq{ ztl!2BQHjYLZSXxfIivSj5B4$Ly&)QBf!mn9vEMRdo(m!8=3UywStf3>A(~^Bweb9= z8=}tlT94$0Xn=W+FnhBN(GJ$RFXT5jKQp)35cR)LJGrR-TF-`PP(6iPYp*=ns~s$` z!Wxe-e;e(6znp0nImF~`H$+>RyPf%&y}f#wxr26b<+^tZb z9%vje%dU@@|3T(wmSfCuig^}T;0_kKhb8W3>cN)JJbONBJ21=a7QZRS3Ky7pi1pv2 z-iKL#rq9|C?PUI(4bfp%*!MB*JJYxFMQhanyKV@=3M>%k2Jy<#IFAPqsd+jOjmC zSq{@V&%{$VM2DE;QD&c}9si_V9ASk8rk`$on9tcxpVS`qG5Kuuvv8sML(Vx?xWF10 zS%04WwQM=hS3gS}W1U52C$%R`=OI?u`6=zY$oxz)6P~}odb517`B|M(@25?Fq3zES zXT$Rs8Q(1C^~Yz_$IYxx8-Gk+>i96{Ia922fT>s79;WA*^*no-_J!%(&%(@xXo>WX&-B^Gk@^)#!v7KwqJ18I1FC7!8Hos zxFOmSyvq0wZns{+H`#u{H|wY1Ta2%LELy+dTeX|@x9KM)b{OwWa)BuxVVYfEFpjvH z1r9U+PUAB?=PZ*^^H1B$9~-r8?QXku*M`pRZ5z5n-s|t-$+qYY%ku=*p5O0Rn6@&1 z+w%P3dB4BKi`pWWyng&(WB29l9gk|;*m>?o1$smy{w6MNi|)qe^ofm~9)@X?LDTNX z=Cn&1)55a0`5Rsz-z7|Y#K!KB?euq4_SEGDOnbi9-_lfDbUkl(hdR#Q*mL&AzOy%O zI(uXPSsMq=+BkUmhS~L3u6twRs*Wq#JKob4vi`<&bD6jMtc{&#ZyapTuQlP^je{d= zvcu@IX7HCa$;RNUesnwHs`gwMI-sP=pAM~sqQzd)ZJ{` zqIoOky=9o!pJ9LNvOepOcdKfRK0JTpfwmJ1)`_=Tw^pB=Wwzz@Z}(FjmT~K_0DrGSpVPjpU8vk#u*A|n8%{NqN zi+oLNr5(+&X#`%resuH48=+M;JImpiLDHap(feo=U2tcHQ947u&hnqKAu%3 z?1Lxu@i{B?H;XlfyRkZ8)^X*k%Dpd?Yt$F^*RwWm3VpJ?m$pAKJYDV)yQfKwePXAw zLoaQM&I@JF+t{741GPVIW9L`T2@f9C*p#qOdE4KcU{NC*hvbjYFW`M=y{0 zNqM-3;UgI>eR2HQmDxD8YRo_V6k~oW^sBc+qRA_Ke!@O@%6|Rk$^9C(rCZL?xC71C zJr3LQ#P)x|$~Lz-44UTl_qe$&H+$*7q@1r6u8e_}Z8_T<-EV4H=JM#ynSDapJq~i0 zh7ELl9JH+a%<5y#y2M&L{J9%v+8(~vgtJU&jJE@pQ?wnY!}IQ$96sXkdn_LWur+|urrW3)r%m1~}F&e}K{RR8@wm2=R(6xwmz3S|cGnflWsL#H{f^5W1Ul~A zzOw0?`&P=nJo1jV=mO=!p9y~lWV5onuiY=P^Kkq58wZ?Q+jnjp(ii7$96fvE_}LpL z&fPe5_QtK5Q*YG4#(I{*^55AOdHHne^}PGZ>lylJQQ6A7+M)}urfmC@PwJw^@woST z`e|odbk2#g_J!qh_;**1dFM9H;XRf$Z2lgvWv}qqmnsI^pB0W}gHbp)Oqe!n+WJ-7 z$~aneZs-`b7AKt?hLqo}{MfFx$j=n4HFjeC-SN3ojKjfAir3W@I`hX^z5l;_Wy3f2 z`LNu+>-&zKdCxx8e!KdZRXWu%E7UcquDK7jMY~Q_*Xn&~>=bp4gmsuxSC`ju=Pz%Y z6UVTQfwq>l2>V^ly!~F=-OzDwjs5QYjWg}54!r-c=CQ{iM&F0^NZ-KkTYBwv&f41| ze(iF%_O*^!>IrqtsH+}ccZKyh`M4P0u3tXYb`4{FL0vmO)fTC!N5)w!pAcxAKMqg?HrIQfKg)^6p-lzZ99<%U+~jhAce|H_q=`%Wm=?BC@+xuV^* z#PSOQC7-qOxX~qz<4*4l9p}9Ey2)~XHja^w(T>IIcNH!PC#$+FkWmB?u#8Ovs6>tfk1 z*&n~Ox=+ipU&OL`*}+(LNcPQGc2xGcSe9^*`DiTbk?o9Sn`KwVvO(D^V_8;qaV(pV zjmNTS*<)hacG=mntR%Z{ESr<1V%dW1X0dEhwmz2CWq)!UZQb`fZ|dGyEbEp1AeQyZ zz7@+dvd_n|QQ4kYHYs~|ESr(NF_sl&m&LN(vZ+|MPxg#hc0l&nSXPsr6U&xl=~&j~ z#p|77S)c6Yu`Dgy5X**SOYT3m_UV}Hm$7V0_QP0KkbOIr?U3z@WqV{Fi)H&|{}9Wn zva4d*VcAS9J0^Q!EbG3R-=v_xFG zCwo>b+bRq9y<6KiD?2xq?ULO;mX&39iDmP$o>+EBcD-13RQ4CwgRRR=+`{kH$Fd$- zxL@Bo-)7lA$Ff1$7h_pg_VHLYA=?$pre$x6W!q)1j%6j;C9!Ny7T#xQZQp|I@v&@C zc3v#2%N`KRI=z716w7*Lw~S@|vg^mPjO?$j_gmXCDhuzAv}TjCAIGv8*}ufHqU=kt zY`1K0EZZl0Z!9|?dvh$S$u5s&OS163PHX$R+$4W?EbEg!A(o|O=f|=k*+48Clif9z zP04N*%L=j^#IhZ-W8RNyZQmZ*Z(`Yg*-v6wRrZ}&c38GQmK~FQB9?XE%5Sa5vXtyC zv1~wAh-JgF7ss-3*>hr9UiQRTwoNt^%XZ2h7|Zs`?iR}`vRlWpgR-tzc0~3!?*+E@ zX?WA(x3MfK`)MrOBs&nxw#dF5%SL4X6w7k5_s6oWvhY4-Yg=YzuZd;5WG{(jW!ZCM z*}QBtmK~BkJeD1mJt&qX+$_I)EbEc=#@W!Z;f*}QB=EITB7T`W5)dwDEN+}>9VVp)$YeBPyXY;2Z2DwYk( z9vaKCvU|m{30Yq(o0i=;mTi}{$Fh>_5AG4Swq;IMi)9P4@5Qo3+1F!PUG|w+)_Di_ zSYuhQ>>aVJU-tS~mXW<8mW|3LW7(wasj+NEc0nvF${rTWcFX!>**@9rW7z@OO=4M1 z))C8=WObj9YVFgmJG%cD%lc&h9?R0QZ^W`8S@^tFYkgy~567}8**jxdLH34NwnH`@ z%l61Fie>v{Pm5($*`s6GVcB3TJ0`n#EbC6W{};+aw!_ zWm{xt#j+9EePUTocE?z@RhEopv$A!uY?thh-cwi?HUGGU>ECX1|1Xx!%MQk}L$Yti zvZJ!k#j?bm-T#YaJ+hs#Y_sgjST-noWh~3eE{=~sBBLxo0Ppfmd(iC z7|V*X%VODX*;FjsCwoRLJ0N>(EUU@RiDgT&bS&%g#_FA7S)c6Yu`Dgy5X**SOFkF7 zu4SB#$$lBjrer^iWd+%{W7!VbzF4+L_OV#DU-l2NtSY-ImK~PO#Ij?u7sj&gySo1u z%Tlt(#j*ieCYBA$?ib6(Wp|EcdD$&u*)~~cEZZsjv-hoA`*g4DSFx-j`%x@AC|ihS zM`T}!W#NOG{}{`XviHQYO|tE=Y>Vtwv1~;4qF9!bJu8-Nm1SeutnA!awo7*ZSXP$Z zC6>+0dSclj+4W-CQQ2R7ZlJYK6Fva;>sZz!I~2<{%lE6Fa2WplDzEL)I0K9()Y&WmMr*#lx(=Vtf+Vp*^3ma(i~cKukEk^R+Y z?pynGRCXkmP0D^8%VuQ%63dFRFU7Lmvc0ivpX|M{?11dev8*P$JeDoV^0BPz9`66e zvOd`pVp&>tek>c34aBlB*lN7ft5 zHp^}p%LZlt>-#{heVUd1E|yKm7Gv48?7OjSyX-5mtR(wnESr;+V%dW1t+8xTc10|! z%U&AGI`8fNUo7jDJt>y;%N`NSGO`E9vQgRQST-rUO)Q&{ZH#3_+24ImuC-5h%Z|pf zeX^g$vIDYzjb%02S7X_dtQ^a_?&JPnEbEiKEtaKauZ?9xvaPXfOm<-`o02^_mK9`= zY{_h%XoqZz?1ABWa@FU=p1tzXtJaTuH9lXopv;gm9n06CX)ek#vS)-W{&}WHt$d!z zXTF-BulMP5$8odv+}G=G<)%Zq=I4mb)cow{c%zsPH~XY!`?)JmOg8JVzgcDXD|^7> z<+6>>bgura=lJSRdiwr~?}eQ7;m=TCNqvLH$aPU)pZe;ded?R^IP}To?T_2o>GSdJ zgAE#e*`xEi?CWh|9)HID-7E{=`TKhantj<^uQk7W^Q^T#_2`Q>< zsvFs9Z~u|~-1JrJw&wP2pI&Rhp+nDJYr^t!r!1oyTW&#iKz8&R zE%%h;>b+|%HjFF7$8RvYpX;^2jl&ABI-dC8XJbq?_U5myJPCVq=%>C1G+*bf^ivoE zY1yFHYM=D@)cf~j%LZ>8EBf1WtE2Eem(aF?`u3`C=0ttRk2UR&T{*ru+&yLGaq}3p zPr2}#ds`^i+%J~PUD5uu#;lF8*Vq1pl_$+(RKMAmlpXdOeqy<-&$foLzBAYUm_{Eo zi#~kiaaixo4vgEB*&E6>`ocYz>#Dv5^$oe# z^SRS&-%HnOU$;7XoqXohx3cXsvVK`v_Oq~z|MOVX{_VBKWT<G=CS1wtCQwudylBA(>>cqov7>hc)4_Ch}fci zruRI^_BQWhmgjAJ_sX8#{Q6{H6z17to{9fso^kV(%=6ge_4Hrf-kxdHbA0@pXVE+t zF3)q~yOLMfWtKmptz9o%d92l8JnmDj$9?_nq1-9PHIb}Z z=z6egX!X4+)OU)$ZXL{xU8KKt_%u5|)HSNEo!3EKmHwI*YWQl#l9o( z+_i>WIPbn>tqI}y7?#nw#XjkMr)##1i`J?tEMsb|2`kH(w2Zm`kIM+h(gT)}@}A`B zj-}3r*dLXnJFmJcd)jTrHYtqW~c&>Rl-uNEPZu4w0 z&&}JK^E3`$4r1Xog|C{0uNN<$45q>t;ZMAdXdI*l+h2Q9>N+MYZ_xXAJ|{I99(^Q?TzNDr3H>>#%r<3S?s2?7SG}gvLyhkC z?YTa)hUM)ZbU*tvmv=~+9-pf_ljZfC<#?cM!Lm=?FYQllUB6*v=9GEWa+wq7v8&ow zt=}U~UcYV1cKKZ8=`3%bG84+2?()KRUs86TvfsV>_1m?weql?5b8i3H?mPQT?e9;l z<4MT!iiN{h+HnJ~1xCUe)+Y^M&CWcX>dDbKJvQo`+LgqmChU z9Jv;Cgua+n$G}%t?Ne*@#qr}u=tYb7TQ>Spxmk~|LHq5`$Jgh0`#W9SE?kQ`Li>l+ z(Nj77_7{{JSMJ)he_kDXu0Dm;L?_-vxH9lrJcM;Ol2rKD4`{{QhsWMK=x0 zZyavJYq^$o|9b6qA5(7kd|Pz8t8MquQ?`4{d5-7bJhSql-P6iXeCzbvT~==7+h?}i zu)XTa_b;4T`Oxn4`Oeq>+!npkXP-%xe=w?mryl?9*jxI7kL$56+jI?C6wj?}~5 z2l`%H^l|lFgT8J(Uo?)dVa!aZV^$rfGiC;r+ofDpxofkXYoY!Be%);6(5Cbw+^1FF z`#in|{c+NE3H`BE9mmv>IC1Q0Ic8ne-u|OiTl4s_cCT{P@3%!aI8knmW9^UQL-hEu z_NelugMNqMYRexyW%+&=>G>b{4UelU-_g;wRt4eMGZo7JaQSyLPI<2Q^IFCI`@-%M zDxdq&iQheFeI3v^-5md&p^lAhtFQSRyME}1P|uQjCJ(hm4_UPy&DRsLdJ?CqC#=t) z14-`3ZQ-8)Ss4o_Z;y`dHTxrsgBj)XKRNwzFsIz`PtSZDEGgf+cxKx%^xuG+XuUtX zcH3cdSbz1T{=@IQB|LFXIO%u4c7@;lI;(4aw|&?wd&B(y<@fG94)d?PR=hO))QEY) zYo)4rrp)u%Fpn{I;{G{(I&Kx}@F(1NJ0Q!-LSKjRbfP`Y zdsxl(H1B;KGw)XOKKrD3Tif3Jsh7qk>}!@wyQ#N#rR}SZuPyg1`tptTHV!q*XB(qdCj>&Bm2oRJN$>%bfR5>|ZUfIadGf z+Qn<#b3NM+bamWm?O!np%gAK)$M63CEhB8p-IlTN`?ly!9xq>WHm@@rcu)CSrQaJ? zvDYf$I1&1ENqxQl-4E*)$?<^AojzB?@AkY#L-FC(n)@teITA9r4P@@c8j=ELgi{eyqkFkau9 zZGOcm+PvoQXuQVpGAkN>g4ef}arhlTXitkX@miqs{_J(8EG0WI$ z8Bh0k^|7LP+uAI zpuSyKQ{Tz!)INHu`bO2aPko=bn)*(DFSY$CYtE ztX$*#ead5Xd-FuIyq6DHUeA;C@1K_M$H$Jj&ErF3tM;`w@8z!ykH&c?tL&7rjn{~+ z`@yPxxApTO%@NY>Wo!JJVO(2E>Kk7=7xFK zWt`@C@<9}Ket6s^Em8RE3Z+Szm)r=Fk9nXJ#D_N=KE@x&$c-+ z_Pzf9RAW2ZBF*i%+q}JhX^UPQ=8bKS*3SqW{~Uv6g|;jzms1YQZE3!)@vGEo8Ev#B zjIYk8xZh;HmDg1#t{Ymf1zxo38f09#LFKB-ea+)Fj(M?j!Ksd`E|#1&`WvU^6TKM@ ztYKd-S;pXB+oF%fm$7O*oqR3`dsBQoO{$}=zFq43OT513x!{_Pr?WSXp0{!Q5gSK* zZg9=kkEnm#-`tqjrN`Q$A6~oVo#L|vBaJ?qw~RfO@&5QSR`t;uuj4K_xsSpzzUQgl z5BOVK^nq3Ntv<%DaeSSK?+>A_oVupe_0d&z#a}}_Z*@b$(OPY-+i#Qdd(~A{SL3?H z2yT2X=c$d4)0p+8jR%H)WB)p2-cA2|&F1a2vre1$h0ARWH?+M4=la=x(*Kf`2aWfP z)8;*3-WQx$pEZxgFIhPhn`-k!lgf4e{glVC*uK&}x7Mig`NGUvdDu@D)W1Xh-#XE* zM)x^HwOli;F$P||x}VM7{QXB420c-G^okSp9beDZbMx|NfRCR8!ZsgOc1&5f!`C`4 z#P+d{d#`ydwEVllC3PKbYi}MWo5w!uwR$YBaV~w?>aiHss(I~OSKm~7d-U8B>vd9n zevNb0k=<}?>}UPsjz`MAbyeB;SYD%#&po-1LqE-^udcp#uc~kLx~;LFopj#mQ+`2R zLmlnWGneaHIq$rv<(|Y*^AyeF_UH2Xpm`5#<+B#yoRNHn*VSv(b5Wz7=J_jJBWy8W zXQI7%Pd|1}T6K+Z{Ilg@&upF}w<Ko&Y zCJwg$;-pj$g?dxZvYwsSeZAA_-E_U{zTT>OyRU!U)*HtAW*0|G>Yed8e!aHpHFNCq zJK@+oq3o6$v`61QQMTon(|n7(^*)~qziU`z(%AET4sqdGo>N{~9YW`ab zt=B}$HNK(cIJ8`2uGb|k zN7q&BwOxIw8@5OPxvIWvuwJh{dA$bJv8cX^`hFbhJARB>eh>AE_UEp=%eK4^Otd@? zm!FaUqLdwPd}Dj_ed3erct%T^vwc41f|lovIzs#NY<{$E zjq}i8%XVy>|HJqU>pQBxE!`*9_rzNetG(-+Scsf#%lGAwp`1F^&6PbU+OrW*JI<#O(~bT zaeMTz_%XWq+9P&OTJwHW7-K(L>l!c&wjRrl4Z$3Wd|zW<-?oVJY4o37o?9j0}ec6IFx+hM=?lIFX};}iRL^Ycor-vN0=%k~>K z>ymQA%6;QRxt8N`bG)~Hj<0ci==THPtF8*0*OcSx7;m2{|&ER!dc{>S4W}Mq3@2VZ}evE(W^Z^#aK{N>t_#}*D+~3 z=U(NuU46OZzh@t=`oj5hQn}8Xw>N)VJ$ApN^&Hdw%+<%TK4o_+JFTpjI48!mYoHe_ zAH>2u3+9=-nt4JSj+&=#o_Bg2Kc22?Lvx33oSXX0+BpAD>aSaze%VldLfL)Fo~r$! z{#o-ZnWwQmj-O|mW4ZMnPyF>(C_At0aL?(N9ac7B=h>y~2R&ZihfdlTp0Rr1hfx)K zt>4@iLVek=Gv2a2`q$H|Z`MR-fe4LJXh+lhaBVMv)iFY>duYL8yy@u~Wz^}4OO$DMqb7*u>xT|3mZ zJ=C>o`K|Apy=@yf9Id6GlsIcOeleXW|ueqqk$by~RLmAp-R^tv$5DaWLLY;1Y|2*>iq^^bCs zr&%r>r>2yvD7R;Mc_*&DV&5rg{@WxgL%*@iJ<1N;w*B~L0awQUs?W;zEq{M=xnyW- zO}Xu-SuPyMdSB#tq1^t^*5>i}q~qeVS0A*(anW0ldg=HHxOp54?U+>GsGAXuF>=bj zKKXs~l5*S6pxhzl=Fgy9@)C{hJNrbO<-(XKC^w~C%WDrE*gWqw&&iFW>Phc4 z8N$(iWh=^F&*ShX^mA3VU*^zoHDh}9zwZ{xb>*FZPqY1@Tt>P6+n;`aO)EE{9Ip0S z#HUA$mgayC%PT9e_YBG%QEurB+L`hqr~eM8zkXxNO`JiwqH?>=pxlCT2hX5f!VS6Z zJD&dfZBcIM49ew|n>mAWdz70ygK{vER-rx8pR+h3mtLatD;VkH^h(;p*4>HPerp?r^wr96x@1X}zv?SD|rK4)?ZP zc^!Woma%E8*TZ)@^)iM`A2$6=m(e)?t7DrwY_8aGYx%=u4gpR;jo0yW<~?Ly#OJ+u zrDF$8S~NrY&j0s)z2o)XN;7)p7N&_q@VB`4YOZZ9^Rs>e#1_*L!^G z>x@&^5srI%)X{mDRo5!vMxO7=UAVGrw?K}6SJPW%&HG?A<;InJZCJK8u71z(#jD;k z?0&iNWZpNdIq&NC7q^&q)28M2H0m9>+^;<}-!mMUUU|^?9O8s|XU&^kuJ=)mYp~|4 z*D$`enZ989=&I?>e_=OF-)(x=UE7=Q&wF#y2zO!8cxR_SjGcMY$4tL$x%}#NT;1-v zdG}t!dHcNim$+N|HF{rk*u10Wb!+8nwsG?_1S=PIVY?QT+kTqm`jp$J+`Mubk6W*S zn|r_gQM_oLx_KJ!uQkr1_Upct`8XY)?= zTS068woSOd7`9jXQs=MDYi;M&RmCfGc^vez@+xtrGRkr1ROv~6g)jjc(FaN{1nNZg;bxw?J&dfN!-uvobqnFGxZl1MPozv z*}lpRDwkI7O&(wKapQeSYtYySmOo(=`@x=b!-q9OyLPERul{GAsQ-Asw!StzejVoq zUfAysDz`_u#`Q*XJRdK2{CyoOAMO?$ztB@Ilud>m{=R2cc0}3i8J3+@cJ~>UomckQ z8J104?t44_A?!2ht3hQ;XIOSx*`+fqJE!dM{m!h-b!B&*VcC8+De7lfc2e1)`=42x zcPqQ|49nJ(J$i;^``q}>JmAdQJf`f9Gc3D9*&}CIwyNylz?rqV`wG`%XIOSv*~4d8 zcAK(W9(ZPLt|+_x49kWWMYS_5yG7Z72c21)w<vD?g_o?=MYrl^?)%cygx@C+!q&@1gi0~)eNA2_mz;H_z&b_^|A=&mY)BmIGY~U)Z zs{VhtFV0nQK&7Ojykl5Yly?e?Nz98fdeA#YiAsryMTSO&$|MyP3Zu|K@e~ym71dPp z)Kv6DK2)PJ4$feap^~DaqLJQlMkOT&4QKw}wa+>Cp0hc3UV8eU&*#FqzrEJlYp=cb z+H3Fga;>~W?%O2qaOLY&?q>(8)SO)n-ynP}y_{Uvo_oD}{H)YfCM^3cqC+=)N%*Ac zauo0x1e*gpRRHGkcB+%B7nR((F6Enop<{NcC^jsw#>1iKL_lSuBD z+s*yz&{lJv9?$9f&#aYDtM`Tq*4DzCfVcDC;1wM^;oSo7!TeOlJ=ujb5vQJ#)1^tIg|!W_k&um4qr@Vu5!iyg7;!1H9jG{`Hf?n2R8Dzy_FQa^% z@=q?4<-VVT)h6~)c8^}*I)@*=He@Uyqbxlp89(GB=d5hw-C0(+39ic0^M$qmmuTT1 z75;NF-_I{TJ_k+I3>@}z-UTe`YYvmIg8HiA>xFNa^R#bTj_JO~%6-it{hnvs7`8e- z1IUR~28=1r#mCfsK{S;Mu^s7t&N#z!fAPRI{<%P&qQM{bfj8mU9&Ii7gsp10ih6EOcGa$ckjl5DLXk0mjG}~Xmw!{nEHXCzA7vDN zo%pft-`7K8LIW}ekntPN?XgTVt^Lu|-koF)nB7W2D8@RebJ#@A5OVU)|G&#wL{9gA zKu-J{GVxM+itt#sr=RGEyDIcKx)Ks~59W+k))&rC$DW5rj z{0J9UmJYyQM0r7#fBiBxOv=kB|8z<}`&k1TKyy;h8kA7p3~ev8AF*lub1t3?P~+4Z zf4R_Zfo~kXpzv`BHVQVTVN+nEE==l7f^7x6R2V&D$+ecX_qVON+S-4P66EIKYqI&$ z`HT0Rw%oOeREtyA0yXdzyf0vMr}?aToy605Uk6X5rz4`uJ7Ov)=!3Tj-tT#M(`yjx z*`7d$E2Fi!EuE*}pM<~F=6CGtS+_@t$LYjG?R#f`i~IiHZ(k=@tx+*ct&`mIw*Ab% zVG0a-zUHfJ6|?0vBX9ISC~pjT(dzVi(8`k>+gmB5&z{!uo0QQ{Y?b#N z*4@Z!Pq)|KR%b7_RTaDo@Dg5~IPdcNjMouaol%8S+`b?LyV!mZc@-D>Sy-T7Rx_Rh6Cv;XEaruB13wZJ_-Q%=I=8$gqN-wtC zt>oKth#EcVIYe|BfwmRe4@w=1cKLfP#D8X?8-|Xw*y(SVE*Zzh*)Hu=3h0d87(qEy z6G**VFtsMJ{W0TnmzTNx(T)b9UmNn8k>?#R=~!=Rc<9fQPa<70$Z}sJhf!qx+AAv^(;JZ&`C!0!NaWFHy6vpC*18O3k7eGT zu8(CC5nGYhkGvZ^@(vdFKJ8M^>e@K`kwNk!WL+(?R1i>epO&wG(wQ>dkBJbt*k6P# zG06ORNqRqk_W8z5c1WyYixlQ&>m14gWb`7V0vUdJp2X%M%Ihg->&CI6UB`{~Z!##G zMeTV^H82XP_}ZR9#tppVoM*PbPb|H9x&1|6*$)`6wSfb^BV_ryl_;AY`Bn!YE6S%E zdHu*s#}{k-6JONx3KAoRpqYY3<(Zy-^4=pEhjs?qqD$GM%HoUVZ`dm1e&?^8-cU+a zG4I0`{UP!HL+)pbte9uLQz0GmOz0BOT?tRRuI1|+{dw0m_&VTg6h2RTGVLL8!-8r{ zgtl1g=P_gqA>&bz;T~7kJzv^;$1*%qTE}w>K64kZ+g5pY(T|A#mj#TYqz;Z2K4oA< zV8^BH>z*6M&?~x+=&?7kGSpiY_^f}uRrP-One1g(Q}+mUPf&NSD5FLY<0xvyP5WI3 zRw!QYeX9X6is|^8WuWYB1TtUL!dD94MZ!nF zIC(fbpDHw1nGP?imk!10>LBZdryiaR`%#)rGyK+3RiN9?cCv{tZCs#^dFnWZb2$Wy z{FwLvCS56qUQNpDoW#w;60kt9L8d zw1!QBO=;L1*rbLnf=y^xHk)SS8Wsf`)36e-Q4KS}wrW@{*ocNTfemX|2iTB?^@DBE zuq|MlHEa}YlZH)!4Qf~tY(T@7!1^^T_ow8a8deC_t6`;J-5Qnv>(sD1unrAt25Zx> zPOw%D8vtw3upzK!4I2Y%(y(c;Mh)8o)}Ud=W^25;eJu~HPNORVtJSbFuo?}k0;|@r zdax=DYXM7WST~rdVS`{58a50T*RXN0G7XynE7h=huo4Xm{Wts18kP@MtYO7qMH&_d zE7Y)Ru&9PLfE8$1D_FjU^@8PT*e0-C4I2T=*02e%h=$FAg*0pd%+RpN&)ENBK5_d& z0obC3#lRLctO9Ia!)n0xxUhVxYy?Y!NuFnr(X_K%jI7HX0GruRobnBE^-!1VqY1=IUu3QX^hB$&HD#Fk57(jV?V&)vfOreTF(vl>jyfNC%|7xvedG<;@|?W^)&D;7#(d;W`^ej4%X7C^UaB~ywIdH~Qp1YC zB;RuLmVu3H?XB{WSMMXQ#YbMZEze!YAlR5z?=aXXnA(#Kxub9s@tN^T2!(w0^8dd?;reQT;ts2${)}mo;V9grV2iBxvo5312Y%5rU zhE0OiYuFrEorW!f)oNJw5c9c)MZu~ytOTq|!%VP*hSh?Z8rB3>p2_56n|Fp5$kPxKFa^~a{6L0q4%4=OpQJlk+n9x zdt5bv6}t2gx(=|YhV_FLxG<@63s^qbn>m;DR68$<;e{QtbW*W)f^yN%UEeI2Uf%+k zUSH&Yy!DA*1z=L2TV4z-Ps1v}ay6_5EL+1G!6F*g1{Tt=J}^VWHiIp(#&_$o6>L$% zCczdoYz}N*!xq8zXjnEEXOkKh1)I~b60lhfGr?vwtQKrq!Go&@6MHyxG6vhg#J+xGYJf7mKZb02w;zmwx%)%pPJ>DPesULl=p(<)OkYTuTfVff z)TVdqkpOe|x9Cv^CjITFN2^9Ja(iuhx7D_Xdz})^M za&vFC{K-#lF=aY^`~dW|8olVzWYfF#=m69GwI9swufn?pO#Id16@5p+hBMePJboXc3U_V%59`<`ynqixOgpTdM_Bn z8asW+a`(5$-wY=G{hyG(fUJ3!9ZQfO8L{HwnQ8smU$yR8W7l7`NTNY%zaBcVT^v4l z{EmTDgGu}ry}fZI^=_ACaitR;_k1UE2Eb(gIUG5Y@VN7VeB{i5Nj~5=&&!1@ojwmt z=>5h(%!j_>0Q7Ylz1XbTrg!{Juud>{exlkBCiD4WwI7}ecpTXxXAUfWm~!%e@68Xz zj>TX#$nmqIsnJWDYHfN)7r~mqs$KdC)&W+fVf|owKDhJ%X26(Ad_;@ z|8RV@6&|I61e} zvEU;wGP+z|0hlf?1}5@OK#m+DuVU1zUny7(nCQo)Syw)kzOTc+hp*RlWj&q$z&oh5 zV>8%*hHVAw*RV;jJ`I}#>(#JDux<^@-sau!k?|P?>wvC+b3GoF+4PQWg}w?*>i45> zq|B|a8vh5NAJFK<)mzT$mglC6Z1-tDn7jQV zF9s&E zR&Vy$a{Z!UqMut{379U=1QU5~x>~RX4Qm3c*RT$-Ixwd{#LxO|d4B#oOqsiWp&vg0 z{hUTGHeR&p-TG$V;ni2z^*hiVWeIj?W zP4AYw6->A9B$#gBIWVc;&ASLzt6|wYtohRE2dOs-rpqe00pSi=UuiZpBptWd+oz@i#94OXCGd%*HFOm2qD)37|STn#G%%hs?mu!x3L zfrT`z9?a0N7BE>)xcj^tY*E7o!SwyyVKDdph4|k%*dC2{229^SoCnkQb3=D7-`~pz zll?=te#Kz!{XMBS4yNztR)e|sZ-lM^O!jZw@>;>%`)NYg3#RYqZUWQy$40>P{jmu! z_x_m3n+22oG57qkpvj9t7x|+l?=;T6dp6edtbTjJW$@_wSApsJ*Mo`v?zqwdrrV?2 zmglA$1k>$13?}w<(~X1a{WSxo_t(6SywIPP_g6lcu3s^j-rhKv-rj03y}ug3^!{oE z)BCFzO!~|1C!4^AG;9QH3z$0(pYV}4>mzT$M_y#YlIO0s0Bo~XZwzdchE;$KYFG`} z0GQkF8hzxo`N-?@k+<2F=hkm4Sie^9Bv_w@&4Kl5*dkcBhGqYmd{4uoU>zD(0@kKs zCRi(&yFY4u#*gy{kb1Z*KdoDyip%{Q$F&NKJu1)`K#N%3c$p_+;lOpW&RJg%>RAlHQMsrb+m!$`t^b7{<;~=-CpT~tzgn#zdo4u zq2F@=`pBeLzQ`;96Zvv|!0LlWPal*~zL|1&d#b>6`_zN!_Gtl=_~7R42Ghs$Aee5S zVKA|eGhS5wZ_D$u#T;dN{fjoeyZ-FEm)9Q!llp%wHsHwRQv&AB-(p}UnB;G!5P9xF z%Y4nF6&`o~DR%A!t3b}>wjB2}6!I$seivdyr*ZgNwH#{(OkZ!#gX!zJ&^^o78~I?3 zn!IAL1`UgY>Fa}PFj*hCZQB6m&gZ3{TEQfre~YwLjq#6o{A+-6cYR{#Auy@$OyTj^ z+4tEL)n9Y)v}yV*g6a0kzIVC3qF}ncO2C>lc_x@{uUas-{?boPV7k3Jz})r{f9?kp zd-<&ohc$ZD|2Dnb<}+YoUpF=nR;^*7Derno>dyyDK<77}N_^-|o8C{~dLR0h1JL*R z&~H8f{g_5Cell&-AI^Bp{;RjYL{1b;`s+N-z5d3p65z9bd2uy7ZhsRw4PfGLha+bI zo_<$fNxuw%^?~`>dBTT&_5k!tKJ>ZwWtLw|nXXU#0Q9v!^i2n#@Ajb|JOKSxANt7y z(C_h~H}22e-U7;`y~j)6bBO(8V4E`7KS9}MSAWU4th4FeW3U-)P^0Sv8_=);Ft@!$ z-Vm7dzgylISg(dngLP}z9 zkC)wGdb}J2)8pkZm^)sIe&b*gFWq*T0jtrlc`%88C%_|z@P-~(E-xQUmsbp?%Zr1F zJa@g-U{xB{04DL&P1g!$YFIB=g@$bci)+{jSeb@RfR$?4ELaKHv8uIJdl!7D5`&m)2kVaPmW@wlRwxs2=wP1@H)&#boVI5%e8rBcCN5i&&C0$rPYK(%- zfxS!mLtQWbI60K{OvMak5tp7tkj#Tgd&-6Wob)q!Uk;>xHe>B})<-ff3TBq~R}4)0 z>uBNU5V{I5cYjIFPy;6Y6%!(r+ckJ{yB5mb{vo<`gNc9S${>N~#=u#}9C5z$mV?0m zWUq*nXCIMk{r(5E^1Q`aBHxn-rsid(+bneQyiZu@I7HtCFnQkRNQGEfxFeEQ)p zyF6gjacxZ>cnqT3R*i_rdGbc;WVJsJ*PUzGY9p&fztts%bP-@!=8 z854XX)&loJGu9X|F8dr`ET2o+H+Z)7E3Q_~nYt5Cny%iJF#&J#$$;?`tRg?XcI1g$ z-tMP{i99yVuRW&DFL9mo2Cfu-uIoJYo5@8qD*|$JGN7M(JBUPg|6iwboGdsgRZwDX#7DGv+SY<9YZfx zzOqz%r*QD8kpIKOO@Tdr1#4F}AvPhO$FP5Q(D-DKFBr+~>k`GRdme&|0q>yaUH`mp z!yN{AWrW1`0pyL-pOFE+h`-$1qVBoERg4kJ_IxF1Tp{C%L)tk3X8u>u_}~AJ?X04n zlE=wkzZEn-bl`R-52c-=yR@ehnOg>eo;W=K_FWAd0{gOtje&hp!=}NSTv!Qp>;Y>4 ztC32z@xiZgS#C`~q+A~|iFt*8!++qrTKKGfO^wMwg<`aZB*jusHT1L4U*Sh@I`pZE z;I(3SH+=cu4O+i{skC8*soq7*uVyHHnOov>-yF$Zi!7kRYCNNXOI>6RwSU=dC zHEavmYcy;WEL+2-z+R^B!ggDv9Z9ffG;9g%Q4Py|f@@(KRtUCD!%D&aN5c|eKXPF* zcIv?Tz^=4?P33e6>&32t%k5+&@Ej-BSnGv;iuN1|eGK`d(C7UiXuQEk{$-B%Cpl4> zBl#RW3-HWt38vmJ=e(DKcYc4^8C(oJHNK)xGJhcdRJZ(8PWmyYhCNOVBEK4*=r4lC zw>bCCU%b@N9=wkHHfYCw7&NvYP=0WYlfTJ4GYn51@?Rr78Rwb6FTIlr(+qE((WV&{ z-$F@zeN)gl*R6x=-6g?nr*>i?RhB>>`*F~Ck#mV3TAuy{wor7n&_#X{G>&r1P1!E6 z*P-ixZXUV?e|e7yT>)iVpd0^b&^XCWcV>I=VhCiuoS=M~@>kjA?YzQ0!nyK)%4R4F zN*PBHpCy}qp8!rfTT=cG28JeB{Qr1A( zJM1!f+kxcy!q-AsH)Zd%%jC&ZDU0&GkFp`k*4bs&o%R(rh2_s9(9A${uF!BuT$ljM z-x@SZ?D3Y$KOGwmX1QNT6nMxloh@HEDD0(t3idF5ZwVTI;rsw`<`!>M!51K}fp-92 zwj2EVv)udc>cD1yRpP2$n~)W|HE1+sYFC@5UBNeG`AchqU+lDiysF!R#x{RnJ7?vn zS6*ktU*0J4WGq)a&78S2n0i0FU^QSfE=B%iFv*?d5G(=K52k$D#_GVvz?@u5=$gSM z!Bm{K={mt?G`az>q=pTFEoi)BV8%qc4$)~EEF0|goXfbpoEEC~gBOB-%EoyQqjOt< zvSIGC=tsHAL*Y|#-fRyB>a7?r`j9fqh?y zXpa^*^s!pE(&SBAXReH2xYFcxWR)vRIrn{GI^rZ>s7Jo>mtgAsY@WIEuC!gdp&Nnj zwUEmp`VNAPxiA?=!(fwO`QniJI65jTHI9UTR`@4_2E$z)b^KibYnlog*NKxKw7tQ2 zID?sKIV*KFM=?{`>qo35{!qgt+Wmmt7V5YJe~`G}1l|W;#X0-0L!p4Y;o6Fs-IR?} z_Sd5T4zAbFjygd7)5xlw3mWG*^?Y%q%8}-!e((=EI6pk#jQ#BA7&nwTIg-T6DA*L( zN`GHHXXR|jJ*NbB`8sPkWLAGRA+PU=pfO`tBL2|?8f2Q;C)cbpsFKmIX z1-fjZCou96iSjTR&nulhkKFTxbulaWH*$nE(OAufDyv%FC)cZCa zJ#P$H_a3Mn%jmT#-LcNCeyxXZ2)@7BI%?xfd#7FCcmCa4rP@1$tm>y(kEGiBiBte9 z2Aib3nQ|`eIzGg_VZDbjFyc5V`5(nHJ|yWbd?x65$6X#+RKtqE3S3wW)-tesuope^ znY?{ay)rwvD&Trv7)wXtX@X~NJ}9rnOPB1($$elH@@tQ>j{dGpBImX>GR_fO`#=Yqy> zZF;Al$z}P4T{SRqv8b|N9NNO?gDTDmx%gx?SR72oyZEG4+@?Ic6 z0{^xz{Er&K zsxJnisa^uP0n5mej7{!M-!8Zxt zSL}MyeDccMls|sJIhVejhNpOM&^TRqAo-l_L-XL(;L-$2DqJU?q$C@~=~X2T=-V$j&^KNsqA z$v~#LqzqXt$Vw~)jen(Mr?uYojIm%q+e24DO>`N7KYM?A&qSM3G+hE;^`BF$^;L+D zA4S&x9Eo7%PK=9aYVCc_%K4yYj(;J;dWf)Y?SCULc4WxtL)G4*F0Z;U72skCFT!r% zCzm0}mw1r3MEj}Z#B@KTQY zUB@1;j@{5kUJ){y)3UCSq@jZEn<;CgY|1W6^}DsVqmTDn{d`F5J&&BwD?_TE?Y2ta z(pG+jNR69_sxO~S(QN2`dua82->*KgSsijZkP{O*9Qk~j!3M#~1)xsHU_#$Z*%W23 zl`;;Ab(_Fu!7dX2QaXC#lh*cN+&AvRD)Q%%wQxcxH9v_Th4zDkeUS5IeUyqTdc^#z+Hvjqu%;=JSd7fnRvXq?J!@68+#kO?Z`$eM-5Q z^X`o?%5z^8GOo2{yYd6;2fa{<(aM!dtAnL47U8Ld=Z57xsh5ADV2Vd*JHeKaw z*fCYVhil*OG(lMexl)H5@_!ag>R02B?;>LXtQYKCoZC8oQ^uLl=dvlC_nMIPetr4Q zrx2_SOr{n&1SmLBQ%>G#|8 zetwqv{RBpC`ua0-E@CF(3%wy^yiN8tQt?K@nYAXnJK$N9#bkVi0$Ii=dVJeIuhHku zS>N%+mQVs&Rmd8CbIAC)7)H69XRPw8j^142q71^?Ft*4WMBdnIL&it^bvtKeZ@`nA zJfA^cFZ|Nhd6D;)kWu6>PjBnzGl;7yLRMLj{^MqzVri>#c?OvE+reYbSOZXdehttz zo)|LD2tm8tF8VzvfA!5z%2zf8cKStH>mC&G%Sq~~czwut6X$Zsd_D(O2UagY$|o#` zVEZYRBSa|{U(5^9ptmts+I<+f%bL+devGmXWWA1aIZF9d2>mIeBz96I<0nTlB7wwI|GX zv27Dr37EuA_n0r^Yd2-3l>Jgxczn@dB+vmSnZ z-5&h%a$8=zvh>`Q@iXoHTuiHSmx3d*4D;RgyyNljE8Xs``oV;@7TS;7c3IXBb)I2* zdWL?elRoW3R@-SIUOY$SbW>(hc9rPLA=n_;CW3}D zZ-{+{!3M!TEE5vE2eI`DLBCe$7B;XDmspsIVF*OEX8+HvKzz_WVp`5F&uq*#Z-Xii=+47YCgXMjIb-iF5f(?Vkz|K{OHIIygHG+*E zcpf>`Z$Cxa5;;1{*mDW7S7fSI+Ifq2U48koY3KqkLeteANDcB5wdwrYZO6dJ9Yf+4 zsrlDh=k>z32|ku-em2#{Sl~%NlgeB&jhxy`L&k{6InNa%yzTj}SNrV~f4=K4MjV#> zZ2SrS_v>*JewkELKRp;ElyettdUnROg`ZC?G8R7+QobVo!PCS1waQv3E52;Gze@l1 zQ5L66?W^)#=E}`r*>xdfiF1)f%S9L)#+FZvQ@)3C88dQ-oEfml<@P?5jm?AgfvpxI z4xtMjL;ef)D?9E=efeOu9|;-nvSTc{z~5~36;CPUtyhGMUyBe9k&^%``WQA-h=tXG zHG?^N$#`rA>jm2{^?AqiZ`>2B$Q*>W{z^Mv;JaYMU`=4BaxRBp<6r||YK>@PGhpLj zDt6e|JXq)|J63|l`Gi(7e}VO{;0s2@SKZ^-GjVsUSa!uz)o&uB^=j`ptp)1`D@Uvx zqGuD>6j)RMjyRtVuvxIL9YXzI%uxS0GRi&^GHwx}98%W|Ski@w?dQSzK5P2~ZEX*% zSKVseDG^AhFQj|IUyy_Ue9mk87+4;d6OSa`SAfO9R)qNC>CfO@L3f8zEOMKn&;4TB z26bSaUyK^MlmCZM{ zUX&#m=z}s;#CAi_mvp9cEva)1ECyD=xzx$*&tGf5W|p#g%KD_tit7YjRfUz~eAKfZ zR`rO?{9~El;X6b4kXC2Q6dg(^o2TqVDdP~oGQpO>-Y7t-9xIRHrcZ?ss;vHp*62#_ zK}mg`HvcPypZe6bKbwA&P4B*bGGf!au?et6bZ}#{U<+VUha95+f-P^ALM$x8kJuHU z(@8F@0IU@3WEU0#tJbgzuqLqAxaexYTEU!kqSV_6)(Ph5Cw|Zd)&Zu*6T3|xBA^Rx zKlp%)RjQJwJq0!h=J=V=CBe3UIejMB64(fs^otyV~{Bp zmw>x<+5{E@bN0q$ei{KQ20PuZi(9$c1E01k5S^wePf+gIIu5o6%mjP23?|D*tvIjd z^tkKN?u*_jU2SQ`m7mA*Sifux8J)Hal2CiEJa}1Fdc4MDY}Z2D*==7Jz((p?e~WF7 zSn!-I=M-A;&u-`=Uv{sJ)6WuZrYzg1Oxij|SuSPn{+m;XxQ z8D;m_Wmf(w+f;;2mN9oYVH5c#G$!<0z7jItV&_XK{j9Zs_8XkR?_}8Ds*|?%A*=VR z?6-RSRPBQ>Lxt^^>EEpHgY*iyv~>#py04{kF4capRxr2im%v&yEcXQRH87bXb*?4-GEHl(5n7?`>s`Yi$00yZT;D!$Nf+~*cZ_1h2hejAdMN9yT@KmVH{<2d2x5Nr^v5bOkn zSlBRF3{1_HHr+T_8JPHh9Kt&TRtn~h`SV~E8eQmBv`@qG!E|}WU?OjoRA5?hAr4jz z=8hHBV0B=33LS^U>jtob{*dt<0Wj@_%D~*a#ks5vIw>EZT;=Luj*l_axbs8-YMnm> z-57LlvE{S}t`xUW<9ULzX;&GYsO~8jTO=vlLz$De3bq6m`BunytH|Pz_?gS(Pza{t zmqIOD7J(Oo%Vh;Qgue`|2+Ya<;$T%a|AU+_k5g)nv7Yx0e0y1(BFR-bVIML^k$XW} z27|!9hJiuOcEuhUpJUL^Lw~-|d&Va>7+^XzeMPOk>Un7MzMUSQR-D2|iK8hO{hIEg zF|{95%mX(g@EYQ3dnbm@62`L7e!z*U}{c6KKG+5e~E)n zfvYhK&NB`Qud(Ufxai&lJ_o%sPGzp?0F(NjpUsp%*Ri-8lgtqsE~`j z>*0_8Fr9k{)&f=r7U5hD!Medp!JNED>^BHj0`^`Za@Xw|C+qc^W7b~U47?`1pZ9J5 zWL7$93RY+N7+?}U(W{UfVcLG=)wL9?73^5ii$mxVV4Yx2?jTqlSO=Keo94UNvl(m< z?CRzAY)j3P!BCdotTy6~$Q*`$c2jx`HG++U&47Jbg0b6IocycFvyGD-UfFWy%C;;n z9P*r?I<4e%!^)O&P&sY&4_-}(7adBYjF+E#ZIS?61e0dTAvtCp*aBF;0NgE}73fv- zmBf@*%38MA*8o^71(_6urw_ahT+Nk=hs$u_QZ__c=oca5M$RpHUr=SGe4P+^l&M@M zFhJF#9Cd17J5$pB&Q9 zQLu{Pkh;$6U?pHpVAl)4A^l>4?E$M20ROY@O1I)=J!R3~GFHU4^b5I-lN0|rJweBm zoL*@2ZVnmiMGk>Gz%3C3VpX<-(!R^p*6zYx`Z=4_)nTblPh|*9zSdOkJf`_4RLFPZ*Jk0_SSLxrrjw%UEH?E2}{n2J(1Oxb6| zk2A;(et@egbdr_hsMsG9yVkbeM$!g;k+z z-o^2dG3?*}TFw@%%wV1-nd`@q)%d57^~}U_TWh`)*y0!6OOTWM2HJz1i#fO3lgfj> z;N(Fdse9dohmQ$g!9+Tz61&xc<%6j_37xDp4>qH$g|afrYVDZdliS!o5%a8fW>)SM zSGM1ZjG8|mexJ%5ya;dhU57tjU{%-mO5aGlfqz&4cfY&xlE5afFIGd^1YgBp$ld+t z1%01nI71k(3tY0Yf!l#9SJqiEXB@dpcc<@#b=Hq;qjR-fIfTl6=bWM3t&bA$bi(t^O#bY>Z)1OkwyFCz`jIs=#a!es zOY`U8-+g5r;ChIRza{F3+)vJQXmwIht=ra$g6NGJYVu2eobB^M0`nhfzAuBdhOm_L!IH9L&pd1`5@P&IND5-hVs%7$JR818>=! z_u6M8SPNJY<oTWn3E}(pd3!gt7_BoG~kPPJoTO zFp)6}HU{=lTLyDiO2!gp`G5EBkL9v~Pzd&Fq2~~}gHw4EI|#i9MQ-HMNKP9XXdn{Hz14X=OO| ze6$s7?TvQ#yVovIV?#AEYV(fT@>=4d-dS?=@YTx`ty>LjYMsl#8OIzC&VX@Q1U0s8fW zJ|{fjtM38k1gRtU6yoPULdMoZt7C<4PDUN;{CzVnbu?4Q?DO2e6y(ctf707=U8XuR z_`iBqd73&VUI-cQ7XMcxL*{RFSv_?h8$e*viFtexU(b!SMzHR$j3L?`GoeeZHFz4D)47wJuY_OLOZEryN+7@`GUlq3QV?{<0pHZ-i zSBH(=oXdTV4^5$O`HtMAUYp!#A;JuCyU;*BK zri>pET^2>(6T{B^H@prlqH?j^cM(s($8A}u*rj%>oNFDw^(Iu%%D7Izn}ql8!pq(z z*TR@p8Im&5s!P2##JWv%XoY@rG;BTV2z?DYi2eJ(hrpi|IjC%1ze=j~R&uRj%G*{E z^X&4}_@t5E>q5=my%T&$^jv^{68^6UzZIX+&|cdUX4{;#JsDadB!#8)*GXYx!lrkP zv2Bh`Rr{fvhHgNUTV+2^(BqO@0!=IQ=Ig`ag3h==54Og$tlU>Ffkdgya=mhJYh5%9 zUsYk)+6U$duq+lea#y0npGnI4D0BQ-usN_^F!%iei(ms9UG^I8F9Vau8{~+}2W%5q zrvP+BfJHD$UatP~dO_*03TSdq4qMOdTK)%#LvO8hd!e0z z_UmbzsDzocIaV$MIqn`Bfrm4VZHW^z$SQhc*eLLy`}Mu1tr><{o!GF5L~`j(X}c!C z%D@)Ee(gVwE2Jac*NUR zC0M?r))`SqMczETeegQ#RvCw()3_c7reCWr0N)Jmtc8o9je!k=wQ%l@HL0zAMxo`W zwa`Z18n*6{!FSX>q*1G^g|afrJ}z=7Q}@Q(Wqp+OX5bs5Y>cu`*?djP5iOY$l;!i_ z=Q&bF+)yKlzL9n(DN9i1%)!#`B`_1r-EP72${1hZ>iR!&&Lb}6gBODfy&B&H8tb9A zxGFO#o1m-_I@%MrUOi`()l(KdC2V|H%8{}+DC1pteLVONm(iusjJi0{khGa zZXA_iA^k7{-yZl>eJMWM2R`ihfHgnNK^u#Ot$W#0wCQ-a(NjfC*(?8a)*J9R_KH(y zG1xTNb|>;BS|E|%V|*I_8y0ciW7y-@1YXalbI8%Y0- zLpuR&j?i)lHUl;e=J>B*^I%h8&OM8Qh2Fz_;KHP@^T9;kXE>L(%6eQbdI)_9WwE!X z=X8nXCRiDm+B39tC2o=P%Y@`ioB!*bHYG;2!8Zlp`~Bmcwyq02?H})|)U(eM$V$E= zY&0NObY@uLYL)i35O>j8`$If(BPW(08E256l!UE!Zl`#7L4?Y;xXfXZs{ExG+8Su( z61#Iq8!Eu7z(00KZIHLi_#|Y=5!?16tNGNhx=%pri1OJ4HVAgKE#GR_S&oiI1vKN( zbiXrf+|0ShH~r_0K|d9w{@ghE@VnA}FTPL+W`Lby%XY5gNNy0PERV8xNg4f4$hGIb z)bEKHVxtD=ilMtLlZ|@4bHz_G7)j=xVPp-KhK=(wtsQuNz&-Em%J8z-L39y$ArkY! z)5FF^{_>Vxs}K9W5Rm+YSX+v`YUB-_Y3G67u~3y73%~b{1-3bqoqFM|KPzk`Gug?z zXY!W}W-_F2W{?$GYsXzr-|!<1b}k%~_MeIWLf4)_9*9e40GZ_+7=UDTS!jT4`1y$Vb^_*))P8%N4V5ALU|+QZx*>6wR|SP zI>ENk4+bMQ6`!p!pkGhj;A^|J+jq!Aa9mq!UvXog`-q;f++(Hk^5V0I|CQm?dwIF@ zl8B)a!dz)EFWw@!-~QBH7pL+r(xA#y&Bd4Chc+S z+zQsIVZC4-8ny|nO~XdOS~Y9}tVP3S!J0K}0Zi=e)+MqQf6}l5um%l_fz@kR1z4Si z)qvG%SR+`ChP8oJYgivx6_|V8-s~f9t1Zu&^W*rzq%AL^kL6`+!~1-56p3GXXJ;AF zbzy_;G1pw;?B7|p90m)sT!U6krz#Jug17K|dkw|tc%MwP98o@vloe4XF*lvhNP?hJ zJ(ctHLNiqrww~>^VyDa}iopH+v5b+e&@MuIp2(vvCVTb1l_+1QDa$@UT|%=2O(`_h zocrk`aVUBY_p4HeoLhCMRlSUQg~chaMuxg?+ZyxIAhq_3!cs?h#{u*ZxgF39LF2lA z*&|~U<+BHoF%C`kd;RqgTO}zkqg*}Lr1G3tyM#%J9U+tcrj5>cCo`$b4S|e=Gc_NF~&_EEap`u}rUxRqt8Zgsi&vhmCGqmvoN7yN2v}3@1_k zGYRb&wATx*Rj=(o+&FH%^Ikctw0jBKq6@;t$wJE^b7pQO`-x!c-eFG+mp3{ofsq(= zrOaW8@GQbJC(v@6=xcl5w>!%^A`1HmvIeTdso$l*_j$zx zLV0DG{FC<0Kr?b-I!_m0n+F>MQ}_2k2cI>u@>j-X?z!ZDBtoZT+B48{t9N9$U@(X? zi4Jwho2v<@-oGGNGuSMcdkxYFHUlQ*a>)MyuxT*oSp=D5hirN4geYZ0Z*2Oa7MY}7 zN%)qKHIvC7wRUB?A5+>Teo}fK{eOwQ*7o=bKdz+9GHi;0$Szss{h$_ zyDFK)*s`C8`Xa}MF3U%rBmp2Hirjx!!QxljK7bGUM!MpH@aHjXu=rN(g z-{Ntl#{{x!J{2|sX+4(Pd)c}prslB-7u<@Q!b*jr2_$YKk&@DjsXU;_r zJ9>$>DnAxor=S_UIz4AwZv#c6qkQ;E;ZZyqEZMUD&!uS#>Yn7ESq^-VaY@ z>IWI4)yS%%{yY)RA@Uo*23o?#_cFyFmK)AugLfL$WC*KGGTw)gm5cm~L^!r~`KJ5Y zzKRXA&{jZuw$RcphxW#R^`2pCo5L`o6lYhF??U@yo0hqOclp7@J~w~WIEq2D0L^jY zW0sv#<0x>CGmP+cilzN^(9d0yu3uiIRRP`tUT|&LnC0A-cH@8kQ{vnZ^i$Qee? zAm<_noq5$UuAmxb{Z{B4G_huTFUxDMPdH;)`Y8K-tVf_*wuj}8S3h%2E`(LC(p7j7 zE5_F#D|>y|_`K-ltuysdn#83x=n6j*HacuN@+8}yT!``ceV-xdJD{%@ddvQn-9*Rq z^E}nAai;Pe8Do3k9f9`}n>XzX_Iu5tj8T@yX59QC>LjAX=S_GLpH1%@304bcx-e;P z6Icb9x^`&AiFA7_omiQUkAu+nK~JjYZLj~n)^w(EDRbW*Wc7cJ{aagB@Qy6y*kYT= z1;nqexk-q9Z+61@#O~EXqO*EPv4*ZpfNm3T#8q!7)m6yFXJIo^0*f2Xrzt?PaWPo zj$NuE8k1tJfOZJJP4InN+N-_SNL`|34A{T3`PnS*3YU(s-ftvw7Ll{m&NV?_Ic!Fz zBt*_+kru*&D*fWt>YN@RYBLu@c|=D669E3sR z+D*{DIb_Mv`bxjX_eiGuwPfDRzKHnOmDUNT_<+(1-6TF1Q$9}lGn{*3kN;S_-LL!c z3$<6*f~>|b*|9bi$F%;x%-jFOTFm5e2`&7pP^adpY2?o!e@|LhJI=5U-0nD|HBU)@ zL@s82-e~W+piet58jpZWSs`VElsR)Kc3~E?^29i0Qkm?KHGL zd4DRWl=}Lh%k6gO=pRwKh>Ri13Mo^%DCugxV84NDP`T1<@a#kSUQ$jvDH2Q-s^Gz6TymjJ@Y?6J`B&Qlst9GQq2J+l*K7~O3Ex7EZ5WLSu3$g z6TA!X%JNzc!8*X^HLM?OkA`glOKR9C*qnw6Af zX!AaZy?X6^HNB59aVr)zCVAmWn@iK4L$_bFnKQq1PMy4FzdvEN5@AU=VyD73-kd^(luyKcUlCr;N zy`km5!L|&t)V=Ow$m;FmKAJ<*D=)*^qE2j+TT6cVjj+0=n8`NHUel=cjS1fXd?z28 z4#7-1h;90i)z%-jexu{ywz>a6wwXm%$+yGCV~1wv+YThF_)_BccZiFb+UVWW^uGt0 z3w;s?TaY#JU3ZMVTurq_(tgV3DO1;SJ!6$j&^1`aR-@2%e2+bB|MqD)W$@j;?L&^p z%Ki}f@%OpDb!f8Qbs$+a$Ql@AU3O@)ZpGC;8s?u{abX%cwbaBU7$@Wv}YLHi4$H=3YkrWL( z?-68+&+S3R_)lH;bg(5tb5*_+sbjv|95(*T*2B^{eUHoyu62aSDusRk`Zo$aLY#N* zihZjo8=|aA%Di^tZKsxj#ot1P@4m1v`n@(>aGFxNkSZC|VA8vBQ7k+jDIZv{V8-Bka>GXl>?L%zD{*XxhTFlRK0 zUgOA``K^5~E$x)wTa>j3Hpqqfs$#ccVjbP(ohdZV!;N1*w>j?L}OaD->>IAxi z4q*e13lr%8c+&C!XkC-2f^{A_bHC&Iw5Oj_`@GtEna86VxD(`@m31HXTg)O)>{szo z;@$7V)^GKCY~sDYHx6wRv}4eAaPA$$Jou{m-qU4t-&HQ0d>gi5WaR!K!@ZBGJv3&f zRIYA$fifE%De-L)8D+@0Mq=+e`*&{Fr9(mRoB%(}s`g8ElxH82>%JLA#s4elk6Y|D z6?(VJ!^ntg<5NvpFJ)l?QvRH>u|8jO6LT`FV&y!qote!FzA}q%=kx7MzJ1VL@{b_j zKH$*#-@U^Rjk0#3+G%tsxFc+Qh;ttul*_o{o|+%CKSq4rkzSj&2U=8{qKGJ>tQdJ; zwdJt~VC~r0FIkiElz?V4G|v6wVy8N=VK8TZ;CyS!RejJ(S%muLMP|yUopEo!v*Vkt zaj(X+w9fLot?-Y)-zR|OcQRqit8-mH&|tl5FbQq(&alyA)28FmR|2UmCESFo0+yR_ zZbBPvAYQ;%eQ0y^_cG1V#mH(zR{x#$zM|D$YO(I^NGC=>&=&C-fWGLD_>nzUJ--|L zcidAP)21DVHv3Nprf zo!$7rZbwt0ZGyJru7lG`{R7Z8{DpN8=P5nYa-R@Qg>DSG_~gOq_CS|^cUXS%H7zgE zu5NG({4L$?ysNO!A<&gUH-68-<<&zsc<;gKx}j^C3a8%juKFe2-d#?6hoP(b>%r+} zpewrX;B=vn({cA7oURzU=|iBahHhy3!1BZwTA}NFfNS|~zj3W01J7E0BD9;KZG7e)Q+_n(DZe^9U7puS3_U=33FXlPlvh)pOS#N5Df!o> z4*lORhIe`TR`!{t>B0KUP0o*>(4a%&y1H_PP10>b0v! zSKn#&oc>!(xc?Q;{u5`s;%zx6b||)-6XVAA3GXoyPrSu2PnW;V$Bzh^xnZ>V$lC&VYGcF%gEVq1Rb(&!#&OAiLN`%ZM)#uc79L6v8x}dZ2#Ab=k_FyjhH!WA28Sa zE9a(%s3DPckJ)~E#Fm}N`lGq#rE6ceYg8(?@FUEg?7xb(RG4$aPo*GhpSfmU>J9go ztuV}u5wmOD+?J%S(Q|w9a$!Wf=dKN#YyNTVxoG#nwjk{xix$K zy0wX{Nwa+vt(43});RTDbBej`S*qBX$hx0C<7`JF>%L2Nz04!Bp15gLDa|E;xXzWPqQE4gu+hvri*)q8>2b#L;fOM=GpIiHG~SC~1U{L>XV zpL)fHYhT`fMNZ@0SL94hKA-c&D)W-nVHiShcI`F0?n|!Tw|{>!VYAkxSkKj1 zYhX>+XKvWv95TBWw^yNIL95bm)m}7|8zn3ack!wlDzkhZ7tLCv?X>Qg$Z2BlW9gEY zFb+uG?dF=hKOI~>y4~z4kC;8zWML{Z=bZVu1Y+j zvVB|Dx~}oc_SvP%o~9*JhCnia3A4((ZZB_tJZQG>%f2@IikxRhS7U)^@7(=q6I!$9 z>ZUa2Y7b_s-q&lH=~ET^207aXme@DM-`ahn{H@-GX6zEOn7eREH7s9?1)ybd2an2U$$M5 z(=`4JC*$rZbIy7$vF3%G_BI(4J;zw(N19!K5l1yw zJ!?Ja*{Cz>ztrLukJVq(p{lxEMB zisoo`mkb6t>1o^)V~gycjk1E(cy;A1S?lPQkk`*lBQIydbG0rFIkmY zJJ)qBt$Wh~ab^DOTqAK>UCXukiLSk?Ur5O0Fc!I_yte$(@(-0?cIp1d5fywye^st| z_}Yp@*Hei%J=Yvj;`d)@RKDra=A+H7N2RWp_-?ju-=FAu*6ey@^*z5s3V^%*U2lAt z+BU8=GN?CecasFqmn9<6{WlAg)T~N;o_34QCy9IvXsD1lyeOSOEz)1^Cd=*N0h-3HhfPb*SJYjzf&BoKPW#ZR(&`5rhmx1M+*am^%?|ah`#;&=D0Tef6ic1X<^ZGqC;AYq7C(WJ>{d;Lp z*M?5{w{t@;|CE58YkTE?GQew7+7|=Yot9jz3>saEAo(<>%+H@QyFSsIoV=L(MY@!n z z8Z~EbTtAuUDSMAhk?lKj&93$1z0_L1bA?F8p%PtBm|YJhy8e;axOO~Q&faGE#uXbz z*FU&>v$^r}L-3Cz|MVU@=lU_RbK2u&^0DRv>5B^zJ*RJZqcmZEvfxdk!l@_IBD3o` z^DVD8Pa%114o6*}O5bwg%Wi*T?CoT*byB|cJk}{yZ#r@_MdVR8RnF( zeUL=ZarHf`@7#4U2C&Q5c5SO9Um)P2t)hDT4Y=E>io%*-|6$>O1phr&Fv>@yACh0? z(l+!h9-cI1Mg2HqAb9d(^W8Ew z)^*L7t9X`Vrvo-y|38R*cef=6SxfFzcNiouY*9@O;U3etnX2-+}=a zZ=*F_?f~~TXsNWw`J%khjRUW$@T{w8#9o@rv~i? zy%z2vxUB}+tsx0it=X{@}$T4Z_* z`}&kJ&CH#)rIJtc>cHLe%l%E94f#Jg(Ix-xSX%y>Oi8z|CwPaBC!fKd_nCViF<0#{ z*F17fDZOvzR6K_k_q|?5wbQqQN75_|`cB);dQQKrgOL5L+X>loypuEeESnB?hi&?_ zbl7@=j`(6S{2CrjjH9eQL`ik*_;HaWI`_%g=s5?acCJ6EoblLm`Xy9tyIzr9?;EXY z??c;@H>!$WDZ}tIZ$(S5rlR}FsaKCW)M@LGU)lbDpK^Yu&YZ7r!(npzZjbzN{aX^N z7T3Q41s*&}+P=@y!S;yn$!sG-)3QKvA3HKT)%zimzge#ue0VoW5<5ZR;z#rrT#%P{;arSK{YAr(b{?(miSv+G?C&s}VCd z+_C=cILzv&yOMTu$w#)|qAat@(&+DY7up-xYAA#>s*$=lv7e!dM4ikfSt*B@nWtSF4^4wL_5 z{S9~GM3jx?x)&U>)JWGrd`JG7F8TFt`Im`&Qa`%Ksv@V4UTrOm7&%|RJvttp%(+>5 znF^?qDT(o4Wd-ZkikPm)ldlyC8?y;JRZ&Z$yAjA-eftM+LFo1N%|%`9dvwj=w{PP) z^R5Q=O7Q7LX7U089Kd#J&C>SxMwjq;qEzmQ0N?AjnR z^UcF5Nv=t$IByIRvvT8&YBHtwwmp_7Hc?v*2B8=0Y%{ys>(DDT(9g-*Cu`T$FOayMj2f1^ z{N>J&QaZhzFp*xrkev04!gzMaQ7XJY&x(e%YI5W|L1XRexx`H$nNRe5ZkC+X6BDtUu#<0uK5MOc%$MMibwp2S+uE+j5?OyTx9ygk zmFp7PB`PV9{myliA^^|Nnuvd`9 z{9!Fd8(54!%i@N`=+3OJeeF+B{IatYeMl`u8_o77S&B9^simmGWIgzBFAF}kntV95 z{(M-iKOa`>&xa=|Ppv;6_N+f2o~KOKpNWSJ&RBqMX92o%Y5h+yWb<2D^-S|M*WC6* zc=gl7GpX+s;Zf_)Q!JJ{7>>{A>(9!ZFZ?;N=JxgBMAu(;=bhUV8s(%n5iDYCvHl$Y z^g^O5!MN@TSGZ@=C0nKJeezn4DDQ2jK!3_CF{!ZBp2w+k%vldzW;_t zUUQbYx3Mr^?LZ&LVm2do1Wc@3CCku+*uW*B_A4>j| z&3znkXGLKhoni35LH-NSXal*D6C}DC3-jbpf&3{nH#SQgYA!6U=&CO)kw2y2WnHy} z@i*-@`B%Xw!KbS7<9F0c-*0;=+Pq@*J?7pQc86BqvzKYIqOh4yYgb2M8#OTXsVti@ z@Vt5DsA{DzTSuYHUvPC>|74A^vAQtSRb5z(8EaOLuK6Qz9nrPr8_%fh`9=ditn8Vr zJRxY9w?NO6g!}hbT4!}kWxcEh=+Aim6_q)koRDpwFJJE6_Q@}A|6;b${MJO*qPZ%W zSd(1;4w-MZJzFRadIozXr+&J8^+O3Z)HeJT`MaLvPl8lHfH48?;}0R}^a3@xCEvP= zuU*?!`6em9nLkL$>1e^=?fZ_scBS~;7TIFj_GD=Hi9P4fZ`&0ZQE}qlSqc7pZGSRo z=KShK*$_~>iKL1K)2LM%T_3LKT4ebL?U64+Zuh`m)dPFY-$885;}(fkFCt~9NZI&` z?$;29w>`1K%o-=NUDNvZ7&{!(=-GS%8?qz6`^G0H{T!ljq@SEky1t1J$&XfiS84Z*JX+rFo$ zvTMTZ`m1CbU9B>F!w+@1pB+V&F@tNvzgkf@eZ z@c{d7??|lKmUGipqIb_nW>?=?*>y+dnlbW=uJiU_owMFxTjzR2C68rMvipn42S2Wi z)X^gP_ne;hIP1)Z@BkK{IcCV%HAriQvTod9xlYc8GZfu##bX>zMa}Ie8O9Dpq_o}e zemp+;Nn|HKV^y%B6G5`B-B?nXTz@Q=J0(_{8;c9eC4QpbmXocxe1^o4eByG?Iu)1m ziOYSI`N!p`Eb>%Pe(Jca+q3?-JFxnmavEddBv={g?67-nRCniX8Utz{QC5wyCt!{_ZjBY+oX~AKDqb)@>Tn#(A@TTz&!G~^7j2K z`yT|loz3I>nXML)+hn#I>=rXAty{CF`NYbeb-9T(kLBF-Yw7u}k4DmQL2bNQOOKy@ z75^b?XdfeMced=dC!ddD`nO$W_3eHFaq=HmT3@yxjdkXq=a^$zew7{O>oxE z)(iCt{N4kAa>YsQY&~Xy;VS^I2DB2SUyd@SNt&&~#$!pgzfNKD8y3FB!dJ3Gf9nwV zx~|#e=Ty-x;!VlhcsgU(5Lt1VfX+QnUe%BM;wf+O_Dki#;sm@?M zefkeSqHLP?aV+t7$EH0N_}Fg1`IJpR$S;GW6HZUOD{Z^&8q7qKStB%`fPSa0&NbRw zs*L8Nl8?WWzSu}XrIqO#M$`^R+`UY?X!iV^Wx7$h6#-U=QppPL`RMKwQu|$O-m>5J z@;h@nR0Q7m-WZ#=IPHo=Psy?7#?Y~BxE+hnN6ekKi)*jGGrii8^`E&Rx&D>^kGVI2 zkE&Q2zB785oQFHB`^s%j3ao(4RQCX z*ZsO83M2%=A|i_jhzbb88Af(w73TX__n9pTUf=ikKHqb>*UFqey;N6MS65e6_d#+Q zX>7ND*zL6c`%?G2cmQmn-;D8xaZV;Zbn%seOR^&`J)321Xpo0)A4Mmw1Ja-T#i3JN zY#2`$ftXgU>h!$l^fMDHZ7$!Ekl*?pnt+^|^fsKu64&3nn%EMxV=JAhsOG-U2%Ob_ zFP0E!omBd0YLW*McDY8Zauw#FD};RP@9dxVLrI*e^_dPs3FtzBFq>Wszm6+NNeaqj zG{!HKX3XQ~STr}coS)d3=PA+dGKAFCMLB#2A~0Gs$SU(*K5`+7U6yh0H95IML{$%b zr5khQWSC1^S*yXLASwAW6rD>pUl73iGW%b%PYaz*SP!U~l;n~vWqya#j4 zl2*_@*{LiszLG@RoNKiDG0LljM%s_`nH6n?Xg{a9AD3}&b=*~MV!?2g@3uc(#Zq^+ zkrpgtE*33wzaaVSp0(1txB|2t*|*-@aJFn2*T*{HVhX1!%)LG6V5pH*Bt5ho`Pnf# zWp9x=yYp5xXFvX$>DfLyJ-_-YetPQtU_W-{s~yS$%C5CZ#}kgzM%OGUurPW zj_w(7HZZ<*S(DGb-%1YP@&Myq)&s6XwYcT(F~gV?@a{E$_|_TZ>};36XLd=!?vXdTZ2PP&I(>H9DqICsBX6}Zs!|H+k7hTz?JC&q zx!o-8QqzOn{n|wFZbZt;w9&Y|@hO;p&pZ%&?>^nBecz}~-FY3aWH)a3)|v9{DN3!? zucl7rl`L13lK#%(ZLCXT&k$#(nx(Qy!tf?$0RL4PIOOeSHTeeEQH4Jv4h~s$QO>UvApY z%d2@=;yYxn(XVcM01kNJeK=t27m+?EtVc$PuiA+R7LBFQ={sV~r4RBiJ38YsL^v2e zY!{mw;u}N)x^l9emD_ThN>e_0APw4%I{nF6&OmyW@)VOJE4sMERx1}uBI+12Q#QOE zqw*#2nU*ZOJ!gunN!w|yEFR1&lfXr-J?kQCk0WmF@g#YEl?5}a#JAKeYYl559dcux zDDiEIYhLUFtVE|8<%c|NRZGWZC8ClSowz2Y8Do|RRsL-V-U*B(2XElP;%)lNhQaA~ zWn~B{QeC#iF5eoLZzn3WB(T~p5OgxxsIYS>s}jBoB%kLLr~F*BIQiF&y^PbxPe71ff%MnKK{|0f59~FO z!(uV|QB*DzR(A`lSh837Q~d3o@U?qPb3fJQ3iUm|<^HR6f<-#8v(Z&qWinRcd=l z{YX+PRBEA0?IWqnC3T}p9jH==N$PG%g@BlRz14quUKV#;v9`qEKo4BDBTjNdE@28)S((lp7yBCXV849u*o^F*6f3sGw%c6L6i&f$@RWHQ7pzk7^=B@Y=P zm(%x?GjAzN=LFUfHh9EIm@nc16dmk3SHU6AwdMzxMwX?Bh3mcEHI^mhvtXPkq3>Iu zB=qKTgs=(vvI_Nu2r&O@h}2uZZYi+_--<-=9hQ}-y^zTp?m*lQ_A+lcV`Pd^xiPsa zV2BNCmh4ZBRln+BmwMifV#BI5Y|E==Cb7g?hWZ3~p5-8p$+ub@%HAW`$jYgK%|OP; zG%f`p^5t4L=8KFho8x25#{23K z^5bt`g6;E6XTi=9=NmrLm9`r@O1Aq!e&?@rVnV*(9CNCu9io7Ck`li5WFF4$_kb+4 zl{>TDgIvDdVsHLK*o|UI;2zb6XNHy8p_*WA#<>w{ z6ia04DVAV6))BcQ%Puk|!%SvpD(nA1;^6FM??CMm-#%w$DA|dE=9KWJf09NdirqdD zqBBEH`bcCUo3Il5*WW-F1KWJo@K&H^q{+){dzpxw2&&3R@e~Ql68Wv-4GOygh2i`i zp{9{|Aoky`g5T_AVx@8T`@f&Wnv+#Tc)izGW_-7aZ%BRcp95sn}}A$ zZ^MVaDzE4CI&nYm^dHp;Gq#C~*fArQ8gcc-k_^ zD^dd`a%&DDZTai*wB@f1jwe@%?>8CD_RdvX4ByX!AR^0{DYkd9*gIRzFG<^PRQ{H1 zv|8Xw`vbd@?0(MWOPQ{`-;i)>$q@%dsu+#F6?@-j0GaIav!=_R%qW^S6YSkDoDrJRk|CKC^HsQVrA$gTRnSY9wA~dpsA`V@6oKi)tM1=h%UFG#KXWCMWxJKHy z7IDAD;qKz4?to$iXl+%ASW2Y!xSl7Vc3eC!@lqJqjDmf3?-Wq%&8DIR0d@}iLKj;R z?;17~$0dc`#6nw%tvY0npD*OGMhOPlae`557nD)jVYC^VV;BX;JZCUU6V)iGSeN+v zWV}`*mSMr$7>=@)hSt^wa-HE_QL3#%O)nCPZ_Hr(%9?5 zmcN)!EfKa<N3SNDkxP%;xI}m^J>_z&qkIU>^8Fy`WtzNzU-#{qD?Mx7xEHQ%HD3i z6$jSoXbu(aWIzFQu!#>*OR2ndB9jU#>|QSk5}Q&cc4JvxBZiV6q*D9W%MYwI7A}Oc zVKs4_zUxz+zHu3 zopZzimxl-T*ZTPfOwVdI=Dq>BWXz)B+#34c#~-Nic~=bMYJ74Tv>{)q#OqBGXFv@# zX#I3zt4$=nNTMFJOaFx~6XCLvq2Df_#M=e>t+`CUS~g2zwq)6{utQOP&g&|}I`e%p#BmS$`E~sW zjF5!~D~LTeWpwvrVS8S34nHf^PbNLT?<|sJrUbvgw0^hecfiV1$ZwnVyAQuVyF%w5 z$nV1r{d*X{$EftOoKgmX$Cs=}F~sorp~_j7^NN0)swynY`A9#uvNBib$B(VI%k^VF z>v5xg+;2VZ){mJA=dv8pv1#Z!tDy}2_^g#VTR-Mn4T%wv%nw_Sh5GRwD{~+Hc(3(1 zP(P+ynPvJ>c8t|fseT-5y`8Eb!;${-2zy9B4c}HXwR3XPTtA)qRo||bHSexc3<;OIPci&GH;U%^tJoszH~lGu;=8jc?&hY%mX_o# zqOH%^y1;s8otK=XT8P*K@Z$WW{W<1?f{qS%z0|V}C$Vq|_C{+bqShE=9oU*$K@Z_ zUR=duit-n=MER|7U*PnubfW0-oL%B~wIpt6bjv;xXJ9Y;St#Z`nlbVe7eRcph&$U-#YIm*=8*>Y`BJgEb)Eo%sVFD6G!Rj(~20A%upYhN*&&t z&mW0FB={n2iG*z!zRo%BMxsh%WvmVn#Bp$s1M^y?*~nSBy^+&DwB<0Paz`VUdRay+ z%hW(nw9SY9FFIvU`eS~D-a|Hz8u6_1|75kr6IG>Tacdf|w3n^&P45;9VjQ;`m%A z^%rckPZYzIM1lqSpgbf~w(Yq)T-66^#1cRQ2+Z%E*zU!^r&M|7PbBdJq0Jo1Tny z)#`S68kIun>IOnlOy>P{SaP6C(t0H*3;%@-T?mS1;B_T=%^y|zFZD2vFOqbRPG7Fm zXJNaZvzT;oklv(UeWqXCg917ypNXOce-~aR%G-!N1T)nw9PMK{ymZ8aYN6iEhP&kM zO%gQUJJ9)@EBvl=lG$~6h=DIyWG_2~N;w*m956VFWcO7R``nW_8F%G*MQu_!1mztD zjcatT@H5qotna|-BGA=1{p&T=$2R)qRo!HfxRkeZaG9Y(E;b({L+?P-Hcsz6)c>!t zK*;e`1}}|;)q5ai=qy`>uP5PXcBhI4tG3}^6SSF!XS;C%9B=FHYXo{={>4~$s0o%$ z-yVs}IiZPzy$1UU8`1Cw_jDol{ zCI94jn(}L4wpgjW#ds`NAPZc!!y{T_3>nKQdBcy9t4rx29WHa2#cv}%nqZbpE9@g; z(OTv7hSP?(a0afzgz%lcJWbvO(oYjh5>ac)&GU^!k`P!jvbiz-sQPiL{2=GZd{#?4 zv4+W^fkt3dp^+jnSZWJ+*wlx`s-ywC_XTy)f+AMc#1NELIG3UkDBh#d z02CELvEDsS7JIu_g(=+~lKFR4Am@Va{`3n(Sj(yh+74fO4mN_vGkW=57YO@3q2xs6 z<{U9S8fj}SKkh(~u_^S1r(uH$bL`C1AN@YMPmvKxLhhNDKZS-`{-WZ&4SI|6#S(_7 z4Gn4mW4xqZ&eJb^ap$tO!=U_Tx+*Wm7C8J1nfJ6PcWWhDI}s+ zEQLhk;%i-km02PdH#+TC{AAcxxe7uf+K4@km{83^53dGo@#Sm+*%|nuf>%%#8Vt_C zlw>O#5las?f()|iwoxtNMIs#72)oJ;+snDRJIQ5(0I!XJ0~1IO1VM>x zxtPoC7CEYW6rz6hFhuh4)r_@dKwkA8XcSZufy1e$9s_%Mi;fSmAz6vbvE7g%Aj?rb zQAxv4eF?#4y|-3qeUwqS?&ejBP3DL<_&V4m_>8bopv!OsgZbW6xbfh0#c&ttjLT)i zd?$HA!^m}(PHc;*V$Qvicr%HqI&sEu5{F7+4-)sSRP7ABpTzq}gns6j7gWt_^{Y;A z^Xh(HnRn{+>943FE5?y7TR@Bdc}a|>lD&_o@H>v06F$~9;fcD&yBS&sOJ7FPoO7k# z#;WL8995g@JL~t(U>4V7td`+u+B=V$#`kESRH(-%_>in};-5#UN0U%O?6|`}Aer6p zxhq1(_-YZl&OmPl{5w`i**m^ZBTS|G4j(&d6+Hlj=cpBamR{kjYOe6}GSns@vcex# zEBsNl!gr@;G#I_Yi*fz`)e1i#IoL^0>)3pjm8V3U@ZUwFV6v?!80aS=RJll%UVI~W#9v`J1XnDiafXCXH0QaSGm&m z8Lf_rL#=s^z3fdYqy}4PW5iM2Hx^2Y9F1o6tW&->m#-=I2Ju$oP|0=?>N3B+80y!{ zs>jkNUUGla9I?D%zhhO!bA37iG@SnNt#{&`NDJ*Qk)}Q8PzY^3M_M=LLL>RNA$Tn? zk{mST!M8qmll+|%aYFYBw4>JxWFbg}T#H=V5d~#3^TiY0ns^;?@J|+5 zGSNq8A=%5gjwj$+vwKI85OGAiYzRW_h|Y8dI#2Moo8s`bo1)N*^FNDXf)jQE+r#wu z9V{{GkL`E(noi;GA)-L1B=h4>`H|{x`k4Gpr>bP9@;`5(U8PcP!vFj*!t;0y|8rRU z&tdUDA8-cR(HO4haC~!xN%(tbAgP`p1G9*pxRo9%2I_)DQ8 z&p@K;MRkj647Xq!ZJJZ!TkB*ChlZt8f|1xwn^3D9mpHf_7HwdR#~x|J?fsh!90fayRzMZ%7^-r5 zv-k6!LZK4dLgo2x=VZPSg(XVLA^W5$%wVT&mv%6Sfq-qcvtZ#vLxUbQx5a_#LtX?L zcPN8E5{ar#5pFl1{Myn_W|2|qDkqi^q4bOSEGI(5(r-llDoU| z`?FlwW$NFL0OmjJuueR~d7NxG3ZU4vE)ezu{XTEek zxgu<6-A7nF7r6ZTr*uPZzF1o%f#dV}wY&&#QQPnZ(yIrS47Tr=<1O0<+efU<=HYjK z{K1bAhf+Dfgc1LHoJ-%cQV#zUUppqAdTe8C%HH0Jw#c}*o@LmGEGHFm9Y>M8dtfc^ zS0(dy!`@9A*D-1v?86)dx0+SHaxD{(-=X;5P{USu%G+8?-+6gE#`MqxM~t z4U|X@-&%)vV>e899qf}*VLONXDmzL4YBFM!vhazyk8lQYPX)a)MBask<{HjUsGZ0P zHE5~EtjKvKqRc#LhTo8gc@Fh|$$NDQEanw6T5;nNb`zy^vt98!GMv5!)W*uOC^&)a z4jsQz^9GL{KhSb3kR;DDQ6>E=pm9ss`zTNmsc&H;NgK||;j$n3Y<;I3r7WB8Z zAAWl5V3yGed+l!NQ2B-Mk*FezBHHX%ij0qos8|2uH#gu1AJjEmx!0p3m;ru#6 z3T-!#b4j7rF`YqlGdz>?*nA(kRG$zk+XEZvCE{oOD`RAq9Eq;nnjC5$E2`#Ko&4bY zK~8VlKjffFHTPp6GS^6qrA7XMBG3ual!j5SQD#9RHQQx7;w)Gr3S@hwN)Ep(c&qe& z`5!}k4z;B#nW2Wuo$}#0IeSeLYjg_p1CEy<0byy z86_AnKXC>uuiTtNAk4p(!U%!JH|)V6%Y99@4eU4R$wBr)RIo$JoMfo7+xPr?d(gft|Y=oZys}Ax5UiK`niH76fBbw-|H6D+mNUcpjlr;xAw^c* zky9kVCEyz2z>dP>{Rz&wga3vU0frKuGOS1+p>^|nv~NaEDTST)&c8$KHZ@Pt#wNAQ zj5o;19|_Rm&qw@$_Lh~f=*PrUwZ!qTEQnWUU+_(S%@J`Dyn$b?z?ecgtZ|eC!#ida zU*mQ8Jx2ce_z7Q#&mrd*r~fjn-xzu}dM;-fC05cbZV0yi^k49jc)Al#%+zK}bB)?r zce-WN@I|=SL^{PO&ny+U%~NYyv-YerF`q(kvRc@R^velf@bXMZDZYqh`c z^GbI9pYRHa7$*-ok12wLy`VD35?GzTc&aGb1b;9F5{VYbS$*IjA|N4nT(tJw(Bw#3sI-@V^tLRyG@(haz$GA9oh|Y!BaDSw zUUA?i_DYGi=qrM3Vy~1CRIje`ySE{>4!Db*-m$4k?ykHq0A19e=<@{YXUfzVGiq^_nbA9c@E(ukI!+b9PrD9QRAnwOcU_Y1t zJm*WotcB+qZvBnR3adQ7HGm#rA*LielCsv6CB zp6B&Nx(-TbiPIt0Zsm3wohOYt{2m)fx25qsO0+p&Hc;y*u@#RB?Y%$?v^Nnwkk{RK zO+QoaaQI8;rxr`vLrgzI4OGx$RhoDe(iY{As1*3X|Lo ztjjq3J=q72>`16(J^O@4B8ZTE4i!|O(#k(0O{xB6zaZhR^k1?z#?n)rfnLybiLqk4 z;cvcNg^9@~?@C|uwTADKwZYd&#b3@y;}DSP=Aj0$WnOZ;~s<+)-&&X>tD$jO+V5<7;Ert#dZqL^A&x%^7__vMg=m)m)n zzC!MQSo9Eg90kvVvuh}&M06(HEV?6j1jem(vuGc5QZ8z>bW(kp%aL^JUQw-+-mi7i zKEvX5Qgy$|_Nl0aN*~=G)kn80eRR9hN4GEMeN-Rao}iCz7u{0nqgXo<_0a>Ok2Xag z#o{m5xkUBR%cYFcM=#f9L?1m~Qy=9%(QmE$MC0^PBkn#I6vl^t(+VU07R*NZcvV-K z*Eg0C`B7Q-0*!AbN8J|(WYtzlBlorxvn!$YQ?1(VCBD_>uY47;#J4?8DJ{ww$XJ~C zIu{8DZz`aq5u`DeQBp8cdoAj3#rOrj+CGVGU$5Gho^fC9b%Y`u_Z|S>TLJQSpp7H z{n$(>jxjOw>*p-m&|zPYKI}@4V0dkWKs{>`=yM{`A_PKA-x8cfAu4+yt}?X=qrR7; z^`%Hw+U!z=vz^>ih+0&{Q$edAf20yiWGf-J(rSv1ADf&smF`3@&cb>vu5>7CE;u#T zFELenIi2rLEB#7>Mv}NIHo|-Zs;a$`X0yUYxP~A(NAeiHZR$XpDT;#Jp_Y;4xkTTk z=9V*rr$jSMmkdfc_zBvzEz~==ym>_J&ixly$gzF0kS95ZZ z=$ihNNc^EPhRaCTI8QBRLsaDVzvU(ie{<_R^_pL#Pl@NL#Xg0HuqD=u*m>&J3#G3c zkWd7~7I zLnEc*ha$Ys&*u-9_nxQzbZN_<>XaN{v`ur|OZ4ZA*7cL-WKf-O)mc2q4rWRUwPzc? zAxxeQSKK2sYy1WgWnjuq#ylWV>6>s5MGJ1>ff6oV;?N*f;(j@SuS?9A5+BBw_&8Ex zl(eyjkO|uG#M=0Wlo%6VqEwe4+l?R4LmyV^K!>l}s)8dVflh)Td!qAJLbj&1>E=h&- zn3>2s?T-qij3P)r8e=Z&DOLq`Q!@C~Z)n6WeM?=FJ(8qTUeZZzbkZFpU684hGIY|_ zBuyEkla6r;v+y$;k>uPp+o7D>qacO3jZ~7(C22OEW=Z;9C&`5~7n6(Yc+~aIJC4>* z3Z>@na6EPa)ySnmEUWSQ56G^O{I4qzDO)YrqIe;YsigN)K0y^aZ zJ{RC10xGyv!1d<=eibmARVq01d_Wb^$-&;20M-e(hA{Zx1_Ae63iwUHu*(2P1vD!F zI4%ba?F5+A8Sq*c!0fJowcP;w3IQiy0eId3 z1~j}1a7rJ*Syuyc`vL~_13YmJVBNKVrq=^*y#a8~jR4P0fRAnl9PJM{`!>MUw*zJf zcy;_;Ou(XO9QGD&S86cTE6XX*%Hg7XYni02;ptcuYXQmjGQ~2Auv1;ClhxUIjFI z4Nxv1<8{EJ0=5ch^#)*ufYbg7c59=Fh_J0Gqd<+;RV8kbY;{tr20`?0S_ZeWj0MF-ujRNld0`P-? z{F#8!0@et){Y$_O0ng0>wD}6~fq=We27E7|+ibu)0y5_SN(G!b7cf!49s&I;0BZ#F ztpv;#kUJ0Xlz`&`?ydrSFQD6ez&ipm7XV5Hgalky4X70Gr+|?Q0UrxEe-Yqq0c{oo zUJ{VE1n{YVZr=cw2)K4BV55Le-vXWyU|R+lE#RPl;okv%7GNv~JSE_Q6@bM8?pO)9 zbroQVfDYdSmJ7IdHQ<1N${zrauK`@P7O+>q`|AL2{|GpBJ>Y%;hXuHQ0(>Xno}U4$ z1;`b8!H|H5HUMlJ0gnqfZ4=-%0q6e;m?@z9X23E5H*NuJ5%9oofFlCRw*oS@0bUSr zwh8#RfX+d{LIHiZ1AZ1TBm~$eVCV0E;0{2}F2IKZF5eCC2>3`qNWj^90JjO4A>g#V zfZGN9EWp?Ys1oqOe!%m80JaF|b^tJ4z-j@n90Y_80VW;>tPs%d2wB6Y#cx z%>r8d33&Jz;Clh~2{<6Yh8`d6FJPg7zD)o-1PtM5PjH`rC-|IaFs~`#QvtVT0va^~ ztQIivG{8{-KX7Ux=r|p)RKPX?ea`^=EMO+?z+f93f5EW=PCpCqy?_VU`UbCT2`J76 zWVZsmE8y(bfF}hk6>vQf>%rDJfNurlwgtQ{VAk|BD7P42{789-cF+tt4(U&sPCI%>M_I?M;f=iSYvI&Y zzX)e~6fz5Wb@Z1Q9?c{wO7P&)T6=dcF*#^%`9;t*#P zJZ0~98V9@#DJ>KZa(+Fkr7Jv^kO#lMz06Cut#j13zoB;EuD}ou^zSkUy;o1jM1{90 z%$TWAs9otyCO$vp^tJf_7pu#aw)jj#4f(Q=%ic3YI@f2Jmw>N$Su;VNlOA#-t&I&t z_v+-XNpci`?~g!R>w45GbM^XqEOEG1YWJsgTz`7MA~dbtpCij^_vat4C-mp3cO(6I z@@JuRUHy5P4butwbH~rM`jgKl=|ufWiS!5F5N5mu1cfmg!CE(m+dlKM?C4XvYqW{X zC0OJ)5)S+%s96*iE^G`&;y8yH2+!aY(+2ZL#$MObhg!@Re&mr`>?%WV2tOpu2KY1P zPr-kCKz-sNW%;)nnX}-KrqqF_P%*wfI%$1lO4#*vGAcHCmm$wzulUXWdApXy_Rn)T zUr=RZ61h`~I{xN1F-#PIyrXnBvL600_@(**@5#Ihn^(x*Q+GukPo*1?Jd)v{*-B-o zl6v*@Fn2*k`5+=K*q^El^iK=hi`GhF|t-M^wi?E$^l|aUJZ`i&uP) zFitwuq{sJBl6<*T3yXI2`rrJPvFZ$Yj=dNmj~t&(IJy$vsOeVgMQ|qDYH@at%3nW~ z4=N~nwQC`?YFgBBhesXeb{703SFLV#__zo*<@k@WaW8>lu7b@Y47Fc&ly<%>$xVqYJM3X9czd3X=;SBum0Fje86$#$2#$CFVZ!2=+e%PEY z3?noy*i2I7FnAfM`4ymNf2tzTKPimHh(9)g+Jtu#qiDW^p|Tdmqwz^~(_4=3SC}!aitSL6VY~PdBq;y$zqV({N$Mgb}WNqqM0;5KB;oV^W&{#686ir z*e_@{7WTtRps}wq8*}S;&`DxseCno$^}W3q|8-fSsgeyn=xEG zx)v6krA1FSiv{}*o_YfQi{AIB`7bY0nc_dr)BF-@X4-xbk-5cQE*G)LG!~Aue;(x9 zWD*n?&yx+f&$Bj9?uM9w;<SSnfr*}g*i zMdm#y_vqa`O|2m!(E6yK^Gm)@tUERcl$bkT8bBX#n-&!Qx z1QBd7v-N95o0EkEe<%|CtVjR~JI!CX5;l$m-|=XXfU^aP1kLLtf$3Sryp;1&Xls3< z>e@mAV?@$7W7c&7ZN2R1jt)>ge3cxkhis^2#lCj62x_)C4EK`AN`0XyI(#SOpg91Q zKtko@_eFvny}Hs$liw{!3;jc~H&)&;$=8T9^&lCJ5w4)I2xtE5SxAo@}wbcNVYI$JwlYRGFhzf=M9d`R}8++sg}~MKA?WV?HEsi>2QxcZa&d zfX~WtxKS2^1yp5Ic!^C} zMnYvW|6siI@@j79rxLm%lzhL!=NxF4>-?@(L#>3bTKB8WDfq=h&&BsB^8GEWv?;6S zYec&7`&)ib#Z<1#*>*$p%*>_*H{%9f`U&3rq_rdPd7E%bUr z0^iPX<~(v}1$^Vm7*4qnoEnqO+$n~8aB3j)Gf%DblKce&9@)E?k1U5M??%?6Yy0(y zptGP#)1wH~X7d}=JUBB`9*k>_aET4(W6C{104Trv3orRo&X1t4m`pk#Nb8570i`1C zMg4~=a**2l>6YAQP|L1qYNb;;O7z|i#WZ;qZg)daUWYU1lND;PoUdG=mvf8_j1fo5 zu;>g&eZDPm(cNAUR^n=QCu2KzfGg+d|B~-$5t!{X}(tbOhhm=jlNc ze0!b{--pR1OdiFzc@*E)weh_y4&RgN#5Xe$->r;cO?+QlRwusuDr5M*{M3o)yO>;p z?}!L}-DK7D9ll)C7wZl5?Jo446o}FHW>AUJS8losn?vL^TzHa92G*kZ$yWO zUwcUgrVku&*yA-|c8aNKWC@)65ZiAv7;IyBQdP-#rETtJYpSp+KbfAH5-nNswblDv#hZsM9e!R1iu}hXkoGJIjhuV{&G=j zQ{5H$QbbC!g-a_GBpI0s_zIdWo+_esSCL%$@uL>6rso@2ZY}nq*r)$e7SB;s@wDA5 zcbwpUO!t$U1cSq{q zC-tXOf4^GwH>Uo5Qhz%2XD8NQx#R@(XS1BFAIeFdKuU_$MyvIs32h6c?3t~5&?ef0 zcbC>nB6HLAaw8t*uc|3f$2&j!X1*UQ4J3ru^1Wka&RWHGP2EfTPwJB;D--mdq@>b= zqhzJQQmetd$eVEH-O>UoOL@guWbZ*4nX^GcT{eO@)eJ7C+2xk~F0wwzv41PRTnuUF zTkU4kZY%23_2}h4PHL;Q;@H=^&t2j(p@%*J8mLd)iMEilSZgg*UXW440EK{vD$24@ zn|%eXucQ3F3`Z}qE{X~-(}GYzl${r-R{Cmvs8b^o-MT(#;c`)2mT=}>f=ZMkN}x0a z4G9%js=AsB=88oiu&@R{)rpTi4xdy15}!?9Y1$;>vmsCmpE7u%20o4x;-eJ8J`VZH z@P0+0u{0wDY6*cRQ@QkhYmx6rWT@h7N>^DJc_MkjnKw{EG#wbGq!_i|C>az+%TIO8 z7M+meu`!xVX2Z>PD4kt-=UHX_5w3(4kzvV5gwy@z{|qf30m zGBlKrI)=((6~?VMEH-gndUuxY5@$eC<~|sG5{b@t~Fg#Ww!nNdxDCjX88tp76BpM(DY)SrKmyLNx(QttnvKe_)0{aL!8W`D%I z9+gL#Rxd;z63N_@>KPXm*JUU4jvMzZvZ$2;TmPd9A8<9udsWwj7!~}|xjhnuxYyR5>l|*?| z1|QX&|AN5Zl1JlcB_bcc^8FP)SJ#QpB+AslXFr2d1D}8M@r}CiiIYcvD0#H=bB&P5 zql+wrcGfMA=8_dox0jV7YlM)JFP!=3V^LPWp=1y)jjeM0R~#cZftgHZ2cjm`G1n4N zk^PR-KL+u1ABSY4;wg^;B?a3o@w5=}#2Hb<)Ax0Vr*G$Br+5gxF;ZV0@=15`B&&-e zpRNbl2I!Tact^PU5`*-`dqmurO`aAtQ z4~&JUq<`Zk{ipsN{gF+KSK*v_5w9&+gY6{(3>-*T$hnmq{r#>NA_I%$LTGZ{9Xjp#C&Q>nJKRz zsUjLLkBAfl#F|f3+n#KGFXXpKc&w5e%r~vtC>v_F}mi6e`{+wg##&rtTtLmy;sDw(M;MH@4+UowEeNEbq~-o>TeuWEgv3D%A+WMrJ?1bVe6Sq7OJBF?9fIysoT_#U63kUJ9zeX87L1Hs!??<}d zo%M@w<``Wu-|nwok8c|6z^(2Xw_X`p@Xb`Y%IWqyL~)NB^n2 zR{uLl{q)~z_YyUl*;b62k0q39!1Lg-W9LgLIgBzDO_G&5&%fs-Qan$rJ6 z`fPn)!%s= zBqHa9akT6`6n38?tPmN6F}x;}Wnncc3Kjxu`-numiZktJRtaE867 zsV?yqooPl#tV9i6%HFIN`tD>@x~Fm`5(A>l?Wd2@WEQU20ZpUD!G9&voTC zGd!&3<-2S<}^l)1bF;1L3!cj=KFPeUOB1~A8Dsz2AAuZlm^H8hmaW3n6!3h9){$#-OumY(MWpfe()i1KZkoEA%7woXdz)abvR%Er^hl%O zySQcPiF2v9^zeP2##EFSt;jwHyL6YucXE2^I*0>yEb4r-9ZVi`<(#W1>`TP-vf4&B2Iy`75s|9o^M-3m=T-HXx3G*(Uel+bn$4 zxPD@fl1h)<;yzO{*guyXo?Stl3vrz;F5BWhxAgFBp0<3B=-ebvi^&ReGeKCyjob&vAF|aHIR#%orJ+WA!-4$cS@*zkoU^pDU4i!UT#j%K4CLIM_nJ z%H#WcngCM^Ai%Z5A;4@=(S!!&3uH8K+(5OVZlm<@5PSJyEax;QZhGdUdT`BkVpJ82RMafRuT8f9f1$iMTRLhMCG?oD0nUOpvHci{*)Tmb4D*x zmsOL!>^ibchFV-R*|hUedwCC@q2_sL!@tu2S@m@Fqtn}P+Oir|Jyn|E!v$o`>O z%ol&l2+d9MpTrdexg)dXhOzqHGq^O?cT%W@%9~VVFAnPW4{bzN;UM6Eny(k%zow~n z_j9RB-naJ0b~2C=;`p;1-eY069e8{u=~Pi!Q(jlG`^x9>glx4VG1us@&>vLctB1rWfmR&Gd!mggpecF!>*81lPUT4 zb4=CWo>S-9a$ihV-V*Y!Ojjpd0}G|KgiAnUyruRFNY{-Yt!{rncjbkHe%_EHSqcF zjTk=a{>kybb7%!Ichum{4nt1Z_mTU|ZjigZ z2g=>vLs%HP$oH&<+4COHDJ8Q#Z<@NY&Ym|T=N0w*hW_qU-g`L}>iN^0!Y4zp&u#aA(UE!tZb zz52dWlU{|S)TGyc+x$y@vF7~S{(zc)-I;%zX9%sBf80gR7E2hk_WYZr^21k6tobL$ zEPga!=Q{~CCSUu1S(9&?ntbn?ElCKy5zkQiJkEa;KW!$Oyw?2t;eN$WJ(+)V<-Ovk zE9)dubpE}j%EahoDH@OR`9&0s8j~;OZHt*E>&f?qx$iA#^@z`_~)N* z)a0M1NcnG^UxS{lgFb`riPEQ=y#G7&xk#0%MW3VGeO_Dsl)qk+J|jpG`qa?x=Rc!W zS=oQ((n4+h{v~pVe$R2RM0+J$ zqxso`ncreCmh(T@D|7W_O6EDY%dTD@8#t;)wkvj_yRcI_eOowLH@p!yo#r^Xc0_Ct zd`Or7wisOEJLnJdn`r*a$UiGrA+QaD=7a%MibXS3Sv2_;V=1qaw(egg*Jcb2+<9qB7K7%6(!*ZYi9vHuBtGFp_RMq% z-Q`wH?tXOG=Ugg*6HRW#1VUZh&BeDBDm+^prDMbAdRkxw%Esu}Aasq`JBxC10><*v!)YV2Xdg7I_+B|GIG?jfTz{nt(u&H9eY5ig_VP*eh{kN^ zdb(0oD;n#;C70FpRIl!n+C0=ouLe)8t3SQa8>By>E1ZF`HmMV0$V_5{9FtELi(*+H`ilj%wG^eN^5-!~A;Xd- z193HGNi0~Y7%#8VKhybHq>Yu?Ty{sLXEo!Vt2^kc`Q|N*y%;J}$iNp9y@zr~p2fA{ z4qrn|POXwW&Bd~p<2`<#y=*R4&(QTwZ_m_J_0GLT%spJOqpP|(QWf5TNL9^rRk1qw zf^j%>s;VQF$W1tGQ*l*y`Q^x;Ve=F!DzeWRox>$zbf^qGdDW5|jf#B7&HeqM1s^O) zN}6pM4(;VDbe1c>{ZnOGK^9f?7BGQNS_@rXBac`Bouq+&nsc}}%#sgnY~BRFLp)o3x!*I?>D!=9f%Zur zE(^z8=&avU%!PcaGxz{0Qu|P;9b7tEwGR~@3tqsJ<7=sXw#-CTCmnCMADSG$S#`-_ zFZw;wmU|k;Lz;7EV@hP2doFhPIWdZn^1g&tL-$ze1ns8%b%eyi|Hmg{`1huP_$F)O{|z8 z|5>&1{}G2!BL1QpWb5#AP5PD=>dM|m-5T{;_VfJ#jrDbPP<_UX1gyj62n>+Xdcv8m z2+EB$$)WXw@PslUN36E(1=H7Kt69W;qVVOTG%AK)oWKu&NW?z4Z{P{>E&2<5e}AC{ zzE#)+h zWb=N(#HuHp`O|;|`SjVdp!3)A=?Fp1f|SUoHz}=|rEdAOguEx3-@~6zpwTwdsW6L| zPd%x=Zu!&&mQy?!5r9$ z<$vA*aRU1~7ZB7ZhFehXge`9$@y>cL-K%bbJo8Kqf`rX`v2$XX*Q^rGYi&N@WMa)cBnzyFdH z{-I8wB_sWvIjf2sy4nv)QEF}5bw^uPSC^T%eQl_839zeEQ4)*D3_gl>jq zDtN8wG@x#l88odHojPLDjH6Q%RU6vcNr4gS<^$mu1WOH*@ZKjC>Fnj|nkU6c0rO`P zISL%TlcSb>m{Wo1nEyj=y*O6NPGqI`%W6|C?}jMVCiD6)sdoNTwWwAPgJ&Go)?aT4 zX+*oY|>A@5)l zuA29ZHwTay97#enJ}+jT)AWq)ApR%Z%5nTBcDp!HcK(DqsrbybQ7Seh$6r$MovF2` zI8MG3nLx#2Rbg!^df>3SsCfPpbx<*jL`B88{bOW3P#WX^=K8p~G=3(Fei_bu;2K57 zAtL{Ht!MJRbugo)*oVzKVH6bm>!#G8=|S_n?bfu`RX0^ti!gNhyTO^}d6cX@t$%ym zqT^u2jk2tkn#)O4hrepbL-9@vH~&}YUr&uw^i=wnm*0P-f1UG0Epq*73c18q9?sm| zx2FEJ%&*82)xYNO{9mqrdBwjI}7%?V(zUfZ2q*Io@(TCT6xpMCF*CNrPtqCMr(MM{F(>E5uCAFv6 z1fMm<;`NRDNUW`I{5P$i7mwETtFeB5e^pfSHzvnlO8$=~*C63Gv-~$9Vf^~(RTb8j z{Exz0bxHn(F8*6{rnzlNGhZtZu-H9k-C33UDh#3SYMl}o6-%NNMv z58+xD+4IWf!W4U6puPI@v|LeT&wH_t`t#3$>d*Vb)E`f&J#WlZ!*^c=-(RdS0_D3g zvKzkQ|Z9f?8bhr93DbdX7pr>K8AUP~VaYmtSttnEMDc=L~Q;7fL7b z!Ad_Bs9@icMLNSvkqmMXCIu$*jQzk~R!=mY{!8mA9VN%E%^EOOY3m--g@8~=_{oxiet@z_Bw zzSkrg_OehK>#OgVg1uu}2RdKmJ6aiPX*;^&s4?iMQMoPoih|^ZBQ|lTpYO+ly`#zH zzA)4s=LRR2i3iQE2xr>B(pRO%Rz@Yx-tAk)g@DbBLGradIp&PT+D(%M$z9zKw+(~5 z!h~~{9d02x)Ff_w@*OT%GNx6ac_ZJ3%3UpO8&-skK^x@j)CI{GkNB0Cb>ESKC8Ia+ zHMw$e)u=Y)o5!@#Aw#v1Z!dq0H~ymhWbdJdBggT582kbwN6Oc@p=JoJ+Xytz@E6rf z_EmZ}HS{j7=RI8C@ZB=g)7n?bmxPOaTPt^FjNcmWh6m$Ns=fOjRu}V=N%fNa-IKi= z8#+F-c@N=U_)gjnr2H@_C8?+&+g?7GXXOu&>}6yR%6*gM;Dhc-eyXZBoRR5Lf9an> zL@f5pxuCSjcjwz=8E(jD9*RF=C@amzZ4ZTQyxCM4Y(Y%5?P1;xN}tTW9tx16$H9RImi>^E{=Ph3=QkQ0S!jws#c zM5=Q7TSS|ACa#%#)Vl$ELEXaP;kcJ2`XyhewO;m*d)Z#U%#Ob77WeX0{SwF8dg5s5 zPV3l9%&1&vJ0Q3VvRc163tI}NN&VE>-;*EGsF0&tks<0h>`v}K}?$YRx;{lP60dJOsX`q zlr2>eDMIV9yi$f_A$T?qW+oNH6TEF)<%bzLr$3J1sc|oV;$?|{Y?jmCQYa@^dYfip z470CQ;agKVE+Zr6*#NU0raS4z7o|q{WvEXI@8jzACxotTBB~bMlEW^AskgO0hOE+ss!e3LoI7v+hg0wG{kc zO5g__pK?vV628Sn1c$nkNnNS&0%6KRBDqpyS5KOgE4S3C_&p_ZFS+u@>6;9Pl2r&- zl$a-+`Gu5QAg2z~IsKl>4#^yfu+Ya4Q{f9n$vgs4{OYrZBzE+#QhY1iC z428be(ozj7Mf$-S?Fcn$itu0`KM!91S6;jw?8zMU-W zg}yKM_)GexTe#Pv?_E+Zk-npA(RVt79#7w!1j#6U^?dk%c;nc7$f8=L@zW$m=R>lj z>52a@ajz@+Vwb9F$QLEA1|-zCyjay&9ADqplBVlhjHh4MH+M|!`t;=ojjal;^EZcDWUeB|=$3#~Sb;`4+vKc$om?SaVD>1yWV(U*kS z)e&={KQYUC^^p7Z%rT>7aAV!o@n^O0oAW?ya`*jf{HBmqP3}ben%Bn92GJAoYgr4w zpC7FYKQ+B$@<-F~3=2o`?|t4qMnhZ|wItB?`!$lq5hSfl;mpyLtT9D`c)}A!&`1U~ zeu`WsSbnP3v%2J?#_ti@6c!TvTKot6t~?=rTdt^u-G`{p zSY&YFl5f6!!EYHby|z70{pMZdSDM3Eo?^@){(Nix$CZC6q5L&c{+vkp9w#V&e_Z(i z3FVth`E3td?Vo&t@)yLFZ;?=bi-YoSMauucQzHJ{Z_BJxlP)+^4~SfT@EIA^oKurS zFH;M6FNC>3y$EMMNgmNDP9xsuR0#L;wQR}SBTCG>7=P=NvCOGFS`U3=>0@#2Jsj6w zNgeH7Yqi(jYA=sGoJ{4D;b?pE@z#`kqb>dcg<~y#F^U!!Ykew^(%7mejeh-B*dQsq zG1!36O093j;S;=9Gr?KZypG?}*S7pliMBF@R;c7{DoG%Y*+UxqUicxdzUOs)>f(o*pJ2 zh&F9jo9*M{ zvgmC(s~I4!#z<8O=0g3Hp`X6gPg(luU7l1=s5PAVb5~8C2#*K)3eG|zFZuE-^eMBR zU`%_Ssi6f4@ydqNw-n?Z5_2M#u}->MQ>+cx`>7deODrc1aT`BeZA4Wr-Ir^~E(B~v z(ezOmNvtoOoJ~2iO$T%^9kUlY&0OG-{-tak8O`PSQH30u>lr(22lLM$7|*wPmVwqN zZ1qMv0(b-G`{++``IsbkKPM-C5XGFRbK(ihma{+Ea?xt8!=B+1$epaY_9pz4)Q&0@ zNNPt(?KpVw;5!U|a$D-Bf#+r_r9alaaOU(bnBjcYs!mIc4MqA{3|X$U@x1% z5BZKl8J{8(qP%)Fd+|=~?d1!3o+AX1OFgO>Gr1g-T~FBE#OasQiI*%8rP_R3UWvn1 z->(w?MGfwsBtqQqw_7V?@bbkGJ$RQnLU=$v5z(o6&hT}i0kW02G1}U-CvNTXNNetM z`Tj{l{oUf~KQ*ELB6ZbD9K%HRyMM`l4eyHa->IGdBmaFrIL3dEd!n4>R{Zz+Me+Ps zJVNo`;8?B*|J|%OvqMoVGPs{uA<_+RtAj3Nw)iKSV!&5V%8go+x{- z7Fa8j7+SgOMeE%WJw zj(iob?qFXL{8&&3yCMEgu7%q-NMk1V3)&-?aV5YJW%N*M%2KijLY+i#zI$PWCL7Fg zG^#L@{Nc>y9WAsP$xi>EIZ)xG>Zvl@2sU%#&a?C>;Y(n?Wxmx>A`P)nX$vZkQB0U3K<+ZuX9bm;+Q?e`kKS!+hZ*7`DLni>PsxL)hMoE5xI%HLUHa3+z_9M{Rky> zD52of;sQMw@_R1?*46}lJ@m)-L-4sVc?i9iMs7%49o=c}zfEJ62hDR8R(hui(e-yT z{y+Aao&6JXE=%(nNGA9&LW9W1Yy3qZq(p5w* zg)lLcJB8$y@Ej*p$|Z4r@3q$6&$FL%&WygFKHuN>_j`Rmf0Q}TUVE+gT6^vL+Iv&S zY$(eP@dYvwyz~5_&WAft{2>l$Ac8&keReR6)t#K+uU13qd|X)zt#b&{H>O7#?~T#< zrX0GkzIz%%$+OdNW(gBwgyH#dds1pPu7tV8k2{*0x6~>}ttjkHGe$oTUC0Or@FgpB z9!?+Rw#_LRkb*s*L)J0$^|AkMZM4ZlBrvy(dE%}zv6mwj>}c@A?poJ?#F|+ z_E`n@rC|LN1!BEt^V?w!*ZfG(1Nh+&Aw%9$UQAr?2WsGlQ=RhTeiU?^xF1EHBZ*#< zmP!Q~sHldzhpbk}&ULtn;asM*P=8NXo@uu@<#|hc?&5get;F(--0YNRb7V?Xp1o+t zET%lKyf?l)^>vdkgC|XIamk2WH`y=RXj3}?D)0r-gUunwF+H^qs_6y zst<2bs}mR%@4g4JR_pbM3~pZ4#I2PQMfax-CvJzR{rRoCZd$26u-}$ef-iI^qQ1*0 z2iEG2V zYNFdA-0ws%OPTL3u^ms|Nj$8Fg>~Sa$h5fn@0t&`SIB8T_wI7%SY8Ed4Y>e}t_Mmuw=ixXSYYb${C56B{C8J>t8wJyXr zTAZWnT22iBz&Vb|obiM#5Zgng&I%S)sTJod%VB(|pVP9dRH~r!j{cU5NEvZ&BgcrJ zqu6P(h6l;^S*uh6{rx5ad?VE4rRL5m$b zJe(2nItpRD)u##Bq%$)GGwVWP8hR94qS4%x(BM?7DSx=99yLj8VN|Q5jrNdrCK4l` zV+=-Jq_oH@?%*Qy<~Kl{54Xnu40%p}Nk_aggKIDZ%Ai$=Z*}|m8v+KXgqy&WW6rbN z&;8h+M^Bx}ok&fmLyqo5AKnu_HXHe<+#d*@iNO!IxlW#_?;^0`{{3PaRMg583m>xX zNfXo43Fk}TeDv1P5lY83%hc{)Zl@o$&Oti+sv*A+L!)Zex3?o&0e?iEaC9~M2jdBZ zDoQ`F!@3P3siyp-8DJg)c1%tx z@Dfp|ZASXh3~yJ&abf(Ay!I*@HE+)T@d!vGqpSUsvxyPz|5g~zhF?T|<(d1U+=gnO zr#^Ww?;CCE&wd#7?QKB-76rC(L)xcMojKkM!U&iXz++lumB$Pp|Ej5&TZ?31=dTne0aG{jbf*S@DcocXRy@1mu}8WFMcL>%`86njkansNf- zlaxrUOtPa4cOhs`r`-y5);j9w_LMe+F&|pZyVleg_%(>1t`SXXKJ;EQ2Ek(ijGxgQ z(9Fg?2xep-A`>CI1;}EkqrTg~MTRbCtHv>ET?z-re4`?Ew8>#SRwi!_)83r!78-&? z4`7^`=C)g)(2I^jYCcWd+so`Hk<|W|(u9^A_BJI=r02qXlV*0h{bU99_YwuJiy?~o zK0?k|^iQHhUFgU1rm>0s4k~nXYPyYXM7ZD5<3oL)1*Srnk}k!;;o!G;u}dz0ybV}% zd)xuiQPuD?S8zX$4d(={99}6l+C$*SlH~QYUYkmPVLPWgCR(`rUFkF;t3T%MQENSX zA)Xf|XsNyZp}uLUy`sLNCNNXpS2)y;@$lB%bGW=vrq*QS#oB71qN?E@i;lLt$IK6b zMn=U3x z?~iCwNgYxnWP}&e+&MM(4nMl3xUAUkXdGXjb5S_1@|;84-&h;Qvm?>mGuS1qXFPMEdY{A~x7o=33 zE~7$l22YglCdH3E1t|5GtIs8%3H!@AAR~}5{pG!oOZ{a4~>k3o;!89$OnqxqcC;6=LC{y+P9?MssdqBSWO!B8oZd-mm=&#=2 z%ZC_h+^Xp3^~Mox9zCxIBlK4wTIkgK6^?BSIU+f@gP!fPQhY3YsfDYDe@ z71iGG_(AV+dz5?{Khy$q?i0-M!_FMBgKTmmCF=XNA(a)c(4av$FckA$>gD);b-$ju zbO!(|?V0PlauWn!7jN~3U*YUJwI`J_LJwmUbOgQLCHR^X8jwoUk9t|5hcO2@f{QWv z;iP?5eLFQ8I3v;v9!X0Qm*<4;uSrAduC!{$FTOb{576(U)k(`vKY{1!0d%{y8O4*G zeqeBFR#SMKy!QrHfejdj)gwrCARDuiBRKTvTHy-6hMewCvGZA(UvU46Dw$!#zhyh^ z-JAxtrGG?iitT;x1u6OdgVd7I`qQ)R^=d|408nM5a^k&2a6=D;o2}Y)$FL=@u%lyu ziT1#KHwpwhzRN>xtvhKfmRHyXx<>=s(MRbvq0lP0o+f`s0#9L3Vf%}-p7q`3q6DvJ z)rS%>>sg;7osf4{30Thp6`X}MfoTs6tqpa0-bm{XrD>6XUf0D6!9lAU*DE#cEe8~l z`3d?*neTEVU{!%a*#+3bmJt%xe^|qNWEY#}iND-H6!*wRHqEbg-~((>PqiZG(5#j6 zsfqn5#hNFdT1h|mA^nnZe8r5=1>&fa@i99T;I5{27XF~mK5^8e)=-to?_1=AMxe_+ zrPKN%joiB{`EF-i@RP!eB{24ASO6i#p^%8prJ@D3_$uiA|Tlxv|)}K*obU77O zkifTc-Ymqi!&T`=3Jx2Pp>g}usnij;F-I;{QBN0UCF2sBdr~8f9PdDXhkXtNknB9> zFKAI3{*dZVF3e8@6kYP>?DSuTOo18IOBzoRLBXTyC5_>l{8;6O zrXW~-@h`cr1o0N;A?r3ti4?fxJL_*48|YT)fh=(|T7P@_9J{V0Sbxg^B{^KKsuZ%& zHV}E#m<61~eF$eS=qHwZG{0CX>edD|rYYZ;mz>ksNP35L{` zp8MLrdI+KnH%-)cKJxJ!Cs=7Kl!ZZrKS!I7B6@w^ z7@Ir;Y1HM>cbwKa2%C8Ya}CS`fefuoJ~q z%=*L0O9hl#f4Gzw6?k^zD-~5^9)z@3+?g>__nc3yGLo#Z%ov9Tt{_a*<$HuSurU&L zjKT47eqI7EaLe0Xhvf}c@@T!oy1I(9HgPTl;9H+)qgkM|_FWAt%X?=k{Q>#R7x1`0A_Cp#(XJ5?LyPV1r46w?5`8<1c`fXB1ZFqZeu8Io$X!Y4hR z`28xVjQeDizBx*)DzDk#hZ1S4M2Ws{(#IHmw{Zc{`Hm@I$of7aWPelNjolr6{gg9K z-)W$T)%R>r#^~D%NMiKuf=^oCGD=@(y%KX0+^0psQ#>uXTmhl?4U5jAY9ygka+?-v zl3pa>Dqc%Kc}LS*5L875!CZ4&dXmzc1j^&jN%%8zHU9X>`Ib-LF2=VvoNo_agXXgt zzCGuByOq9G$G36(&6~e~`tkzL9W8YI7zp1o@I`&q0T?K}qpL6+Tr^0p!XQt5VM~La zQuvdrS9q`RtLUWd*Q~=|OWJXKfGU)4buXO0qG}nQCTV+|wApsrGm`eLq+Nr2vzse< zkzqKByBrA_=`GT*Us>Rp!&JDK56zt(d0Wqr1BEWi4&9SNgn=8ff`8#uh4l!&hbvLT zh+|&D{tVrl;t`Gl&t3Adrp?KZ99Uqd%$@P4ZlZJn;$E!3!gQ;lb?6F+q}P-#!LKk~ zN;uJouT1aCt{I>h&8GO9oUkR8HSd#@uH-Z)n|cy-&kHrlZRGkVM3Cay!to*?usX6+ zzxQMA&uq$YoWDOZ!^koFGc!OzaxajisIL=|p%Usnui+IP7(D^Qz2{YEoe2byB~F_u z@x(VN!Lr`?OAYWW@MJ1kO{sDFfxbb`d1}3}njy!!lBzmY`e`|EL{Lu?>cINAHct1a zG)?4C6j_AYhUii1v}k{27&4<5#I8qhKgL-0D~nvwrh^a$@Q&We4wj2SSuh&hAN6%Z zF1!>*OG_y<0IZx9tdqumL3=BifQTCspIe-_^iMUy0*z zCl~W^xaS_h6@c3+>f>-Hf1rCuPejht;{h5WdS_SgE?ige(9yh8o`(+R{aG(JJnDG! zT+gWEEdynU0Ov(lUV$ANeSVAS59H=CEClUpAdy~yv)JWT0RP%!eK znXPc4byGHcy*hk7ZaflWzgI7D?3Yo!1p5sFVM6;=K(6BL=K;Es?YCeN+wYoTF8eLm z`Y+mVTuzMrrkqi-{rbc8;_P?!<$u$D{VsOwSKe2G{aS%Aq5U>gE6ILKQII9uZ^V~u zziLBW_8am2zi7Yg>=^s?FJH3#8UkUQ{kCWQP5agD?AUKz)e`LYOQ&M&_cC%7?|=Cy z$dc{Xc_G_x8wX+V9NEW9-*3rDXfs8ot96)5eH#{FC)aa{MQ+dZb7o=#4X_i~ZPmwIuv(s^y2`~BKp4G*BOHq^36OefJ zfJMzhs|v707P#UiH!=!Z1n;L%)B}q8i~_3$*juN~3%wu%F5CpSRB#4U-zSHg&>LAl z!dD0mjS-GCq`saoti)Gdf>6v5Us9P0x`;A2pm{{yEP7wJhClD3WST+Q_>Wo>Ij{S{ z!|Wc=3A2(XBhT{s1?wZ{4RIhzwwBbwe(*xEET1=oPz^(EE!iVDR_b~XbG@cz0>V~T zk14G3tk?m>onSPAF~o5gQY%`~R&+PM#z=_zwx|AGUEk*H$81oA)@_%vrg9%wWV{|P zNIa?IKT{AxNP-RB`-=%B>pkIKJ~;CVXcx z%}{T*HJ4$xnj!_dcWwxKTvbZp02FRUl%vOa0dH&#FdL8rywrDw)WB>>?aq!Vy` zKJHD>kPdqN|HqtPHJR5MB5wpcDKrk1U@vGRBE6#@ zhv})zIfN!98C|1#9^nTmu3QfVBGmxO-Dd+uM_e>w&8Q5!&@IG8!ZzwVT7itnomfSx zn7sJ)K_xaHN({dBagwm6zaIx{CwPLov`aGB^St>Flbb(juK%^ZvzI%G8grdGxpwN0;b#T^ulhUoyej}T9Q(M7ykxq~LtTVXwlbvf&%UC5VeMgqJE8LK)(u07K z9aK#m?^{~u0Ze3}Q{8EQh>F(+6Mz)*DcX;E8tk<&yPo${Fq-NV?06~~ zNTQs#l=@+mXuxTDRaJ&7)A z?tjS2g4`_Z6453_LAwJ)Wwpn*)>z?1LME1KI&J0!(R1*X=kMv;0*IAR5!YJHLTgPY z9-?^j>qA+h7uu(G;5C@+~89Ta3S)0mvnB8>lH*jn)2wP7&E??FxDH%b!zSDj_UOaMhI@;#)p*7p zV8fqrL=XOq%_uqux4(ts&sRXCAaGAPWown&mpLG3f1XVahDF**i=bkLchb^~dP_4# zolFk2Xng?Z%WQqG23EJeT^#hAd5I7xUhv5Pe4o`ooC!19QIJ3lRw{8|y9lbN@7>aD z8re5YgEYnzK@&O(8Af<)|JVlfnD zqxS*XAn$^T4YHICa)6eh4_U*HlGbzq*CDGX(AaqqS5k@{5}JEQ=OYgs(wjdVe=MYX zx@nd49LiAOsn>!oGdt2fzhiE>912ULjrM3x=}vg!YIs`8Wd-d{^10<-P~^z0yMKfeS!^WuE*tuu(Vz|8w6r zFsGRqh6eM^p^X%dss?P7r$Iq=0Ff`WHZcl{fyla)oRdp3>iaD@(G3J?pwJrwIHs_t z10mTOSP)t6loiznr#*+E8XI_~fNeaUWQC`@8_#Swg2r<Co7GN_%#Wz#EZI91Gg}iwBwrN;N)z7+j(F27D!84PhIM ztLP)J$W;-#C?ACoj64git`NX<=|UISjJ;sJe_7z>f41-+?BZWOA^+N%|Bvky|6NfW zC5JCCe{FAj%G6LvB71)cN*SEH?EP0`QR^zVWoMu?sRWz`=XHFs^GAJyz>b=6#{fvQ z9sv;#y~)a*2_&$6fJ=nz^{`%il2c9T@*tc>{DiM@VtjoQ<8-q{nFCRG9RsNH~iOL^eo$`O2KE{{7ut)h5Yx0vDgigwb<~zoCI$z15`A$0M z^r+eHwQo@(JG;xq2Pv91>WiLCr0B1(y`BSo)<*PP7+K+XdxH7JAB{wB!ZQP0wG122 zS`d&4V({ccmZpvRh9tyutAVFc@px#$-%~G-+@bK`D21mfxWb^kJYq(QbXJi1`}?27 zO{S$=A8EbbDsu-0aTtULi)cuG@GtD<%9?H?I$2XMtN$VY;ytCojzX-8q^M}D8{BWb zXt5XJ3B)f4(h$fsjJUToI)=%P4b^;F_>PXhtNcL3qS)CdPhG--5QQ%z=x`# z6G}zL#T#e&YRF;PbE-c0M|Stc#uY?7|&;gT0o^k zR<&PH;Hsas$k^QPZ4Y1+<NOnwcbCLp$=C>-;6kN1!^!VYUUrx~|G{&vtwTqcPKXH~o9-x`VUOCrbF z@lBg^okZ2Qhx0+B{&q!#>h%`;e5sgYd;@V43Vwg+Pj(adAsXeFkT2NiW{{G(+lj9V z4ffS37mTG3yZnw|0QX)xpC4Ut9^BQK(7VSgCTEYsl9^)Dxl~WQ)|I&%|uNSBP&xZcu zm#lx`u{iywm$&tI;ACL*));sa=nq+1{}JF8r@z9Y^}j(R8v0+t0$G1p*wDX4G5RZs zq<>X#ld@DL$m7*rh@>R%Gs-_==uhRS%74SrIQ^3x{T(>0Kky{bAF{Om1>hE^zrv#R zze^+<`cFt9gR}myu%UnRV)R!MN&kl6mOy_*K*s35-_W1RQR%Qs3LY`Cz1as8^qf%NofiWrK#0#nM)5qXPx~TE<_CJslaqLP;9puT~vkNFq@VmzU@ANqk% zl?yq-*$UHa+PoYEbGYYk0}VVge^2C}873ayF{rJiku?sHC6#{;Mt*0oPNpYpt3YAk z8Q(-~G&{0f<5By$IJ_ZU*M{0+BOb47Os8ZCT5Z681QIe*e=-t-8naJKK%j2apeR$+ zxBD+Cqu8p8hE49o}{^h;8f{NF*PWtSRcRfXB$-d zO7v%w0L4)rErkXVqlCa&kp}pA3_p=VyKuWyO70oKWwf-_9*Wot+_HD{TO;T;%4N?U zK&9>Z#jlC%SzqC{?{|C-WKK0GqpHC&l$lWt#*(O2G3(<%xe39HT3)Y&@@9a7%FmY9 zBB8u%-STP_BX6J~FY-$w{W4Q}%S$M49VlG- z^>xY9_I%ha?}B3FjWy)ePAKozSb4u6N~B*|x4fc;wtmihTTVe&Gek}YFU*zfa1uaso2#{iu%TY(!9Te>`HCfcodwzzeC&l5q#6zSr-!!*Zm!+W$9oRO&DK01fsS5 zYcn_+Tbu3GRSFypa(LMJ`$4gX-c)ZDEyIybxc>`AH_Ciw13+03GyXT^%><+P_Hmal zro6`~dG7mB$GBznAXy3O&-37>^}9Twya!|Dr6rVC-7RlZeMg@3SI+K4LnV{?s~rcl zj_&m@gho<{E2AO}_ho~WI;84UnRsVeO}Sr2k=n~gH<4gtMa8~v!jV4(%t>dGKQOWU z;R)n-Do%cFQ~vgPaq{i`JhWGhm2B$&4*#rmH~lGDiMHFTl^}E44_%+%MS&W=G?2t5 zDBn3%o$?JOl=n0!TOmZ2OC{l*p}Z&MXV zzc0WjzP`)=g-gHZ63V;QEw4r~@&+36dL@*X5i74nLV0^pV1|BE&rWR5qm>=~BKs5R zw+<98{k{TOyuUo`mUlrh^2QqS@)F9sHCA5Vg!0O|fM_T*E?8AvTDZSMhwr@gtNh~?7BNtlF*M>bYI>!1bjW0#se#8ipaxQrlMc&s) zBeg`{v3&{TJ^Q^QZ(xkP#*RE$UmYp3I6mlp#apefg12?_EWN&pU+(qQnE0vo3N)?X zKL}zSC1tPQrz-AkbV}5h1v*S5Bs_>li<`Fybo%=v6ouYI7z7xRBDZ|OzYt-BuodH< zhQG-+g8%ouz<+_pzZbvU`D5@0gb~L#Ee5ehY2)vyxEuIi1f2_iO#}bWFtx@%y;eMa z*L>nALJCO@I`p;=f0G)1dlI{Nvs0pzz?R&Y=ceze1hUs~B*69+fhnf8Tdg4;%mOpb z1zGVowHG!-$yVGNaL?~of`m-d(N4pAQ6NPjltn>d-%pK|uG~q$ z1|e=fYV`*cd)#y`5UY4?bG$M{$)@qj?w{f;2pz9dqAwBSMt$>%jMrlHnEX$enTtjR zSRGZ3MR*t4U#(8#qL4Ca*6TI+o{%zWk?FQQOg#I}5L*zQJ_+$G1_hbI#?#KmLt`hP zwC=X?sL_*n6Q|SJd9OBLGvbw+JJtUH0l@NpLTX5C9zP;tlGk}UGRUpP!H|8^&=|~_#7`j=<+h~q#Jma5~+!&rotol$-FC^aG#7SCANQxwNTri zwlQ#@3_j!HVRfs^gFbz$3yKJz8BCF_eQ_U?aVAE86 zmrFCsnfFx}zUgHcXvkb`+%#ns&P^I87&NhJ0ws7ik=*nN|FyH$RpZa?D9J7_0#Hu@ zD0h9d(fnPITHq6{mU})q@guS>|ZABdb(5xb!9NPb6G`JIxYBs ziNt>qqW#Dbi2v=v|9yNSeHul5SL2JgHw28Z1@wXdarNuT%-z$q-hPpi)|gYmJ3Z{Y5Ld|-_Y(>ad~g;CSzxX@)>~DT zYkzI(WB60JtmEIWf~U)$mLriA6n`2?$u!*1<)pm$en=HBhFs@T6lhWLNQ-EJnWmS6?daPdPokXQ#( z0sTkQUpagcmlsM5Jx+>)O)JHm4l7qfHa=U9(1!2Da(o3ETd%Z|*VCrJdi*wyHo(&I z$Y%EO%uXpr#YFp7_$sLq%F>48m zjG|7@q_Xwnny4B4!F(U?b^b#47KP5oeM+V*oSWRrN@wAYq^1XPqZfiu9ss>0 z{#OH6A<0QL?|xbsB}KQuvgYjX#ftERWk!3T=6^{ErVaqCtTb;DZCv~>7`g7qez z$psMj*tpL=4&R2ThwBF6JG3PszK=jc)^YHS`NqL_qZ{9c%%d25=eh9xQXw9nioY^K z>e?uPbe`#2FxLzGs@u>zTICeTbe)cS0zs`#Zi=a}06dazw?hF~Q&3!XDu%Kd0yMYQ@_lm03o^Xa3I23tRkH2*P5~I&5#Nts4N%|z8 zMxR$f>(*z;3P+#Ify374CY3Gu922vIJP;C0P)pd?*-+hz0Naz~fP;1EV$#2tQD0^+O4oplL_M^#dQ-;K;w?U9Z5 zJ4E!#7S>e?EuFWR!~)b7R*Z)?QdkEV8(DXNULL~fN^$QL&Hdv~(l8#f7D@5**Olt) z4(kJarG|rOXY*HD=XefZV?raZNmT3n(WSv40*mvjSwnFQGe5gm4YNgPJALIjWw~Xda zp%33ssl6Dp*FL5!4uV`r=Nn{wP7WV#J}&Ch`J#3kRW(81A^WDYkxIoSCl8~(Dxip4 zg&na}gWX|~zo0`T28C8NNW^47$NNzHAJs@TK3=_n4ctqKaN>Ph8vK$;H6AO!4svUW z2S2gg79a0pAMq)tq|kVOFp1!vT9vHCzac7)l;XqJR0DqBrH2vEz+EIm)Yk!-$;WCU zwt+k3NjQ>Akqk$zdq8F^rMFP1X3=q6)DPGC;}A%L)`4^g8mJ_9qv9vYkJyrHf+a~q z=*JGLxh*M~0EIlCu8&j1nq5Y0G1t9(OZ8JPcb`D~8(G{7rfdPc&+`DWD)9V(Br?Le zNQ(My#78;KS<$}1iu^_0P>izlLt12mjNf#8^f!Ef2$K*xKr}(gqVaWAhKsQz9qtNl zF{;&0P~NIU(`LLTiTZY|^CYSFA`amYGGd^&;`RcW7E;(N--M>u{qP~Y7go1slm;wg zGoI~F{*C+e?fc$;M~oYpfm~t~#E>=IH|nnj1zDq>orL$(9Y8zCpThwX*o6dcF&jiW ze{CFRAFG7n_D)Z2u%Hz1N=$phP^i3_RPS$=t{%_UFb1p2(^=`;;jO&rEoNw$CTXTfBXO zpw#xsDG|@}zd82lACD&&$;H~Ip@FAyiFl5qj5MAFr%JLDLnQJPRq> zKCdHh@%FjPz|*osJVPK**RNjjc&;f4PiX^BjS}%}K^bZLyn3=E`|SJ0!Sl^GarU8D zySVbgJ(k=)4~7(Np9#ntZy!yWW#Fk;BA)9EJl*2)v?~eEZdhL9DOVz%d7#wx8Sz(1 z_9;S)nl7&|zK*lc+enVHkM6&E8F(H;-gx_H%BBXM11CztbAf@UZ9JZ8CE+QA<+Xi| zt|$r5i=fo@=~p72sRteVdd~mn-k*Elwp*S z#`E#TwS5YXm1Lhy5yw8I5XA6^=V=Q$+D*+V2Kz?Xw z!xCQcL;mDIx}D!==1;*7I@OIW34U*{3OLb&9A{VuVQ#)0Bxb4a-NUT5H~$u_afKV> ze|o)5tyl1pfLd?eg;*{s6ZipxdMG;Qw- zMA>vtXViM|Q_SsDbW|q!1?$aE?Cf34hih2rwzkssK!g=U+Lq)+L7Yg!`j6s5e@9W8 z{OCx`a^9TZl)4W>;i7k#GlmFc&D~yt00e3>R{);wSM|;ngl#0^PVrj6qwD zerB@+?{>*E-&w&hOw!T5BNVF`!E4gWU>WBnwyB9%S$i*AlW&FX#QQv|^_KMIB z>ll@8sPhbb^M|_O@o-!UL$AQq8&K3et#uNvzwS#Pwp-DUh!F3KX9ox2E*K`;ZoU5* zzPj|l{YSqhCxyGQ^~(7}S7HycD7uUGd;B5#4Q}c;E$`(0jL0~O;F$W87)xzg!ZJ5S z8#N$U1)i};qsHz!AT5^|X>xLQ0bd|~#Oke)`a?rga5F)p#_%0ljobJoDGB7X#dv6z zMvkD?CBMKK-qQPbd>;s1NncfAqW!zqky&;EUFY|HMzj_|yTDG1`f^DQ_BLx^NjE#a zW1tFqCDq=i;9TriB_Ba}F`)>{RVEDtz9 zKg2zh#!tsX_4+Wyo9XHOc|r;*=0|7!3!jzB9?na-ZSi5!q^9CIB4 zZjn8#kHWtS^jiJ`l~>`vo8)KO^82{uYyFM4Fig+rr0IxUm8uq%6rG0`etanfzRM-pDH~RZ&Dx;9+84(Bn zQ|%$n1z!q*wDvwXVgy&Q{dm1uwiAMoNH(ko>BE%tCM4{7LvRZb(jG#%d1M)ASghR# zpwqgE+Q04i)wYT&>C{kh<=c%|r;{H!I<13^sT~fhZ}j&9vOC`So6jb6ZM<6#LLw*drq0{fIqJj!}4t|k9-^U?2USE=r7Os`OiLzt| zD~)H@@t&~eLkhG)bEmenUjhw`;T=5&D!?Fc7Q9D8#As6Pz@EF^$8`cAyJ7Ni@( zWcyxV6zDUHe`ewtaY<({w4-0i3x-GH3(_i>o`ewbEP~T7YJNiz)nf0yJ*ro6E zN0h$Ut+neD#e?aVZUg=%pQgTJ!#t(B5u0LN0U7T2G#*bOAVPwd@E)Gt#SR&KDF%%A z((p2m`QrKP?tHns>uo&iU>N+rcbaqkzF-3W%?X9HMh4bvlgVK)fFZxCh`P`UgY3MaDanC$GMr(mj<3^zpNBNqo%_H7xEJ8` zANFG>n07gw;9fX=q$)m6mX8m1!^ivVnlO22=_F&n2-RP@@nwKUb)hm~s~QiTcFtj) z-a#T)g1J=bL!ICJp7fwP*;iL%{N8JAnZ)f(?t^}Me3$mMeO#-(`9WmN8fT~wg@&g z#;HUM5dVv>ga{W=qd1O#2Cpr&Zd6QvtXhU{X(+VP!7j4RK0m-C;{rFoWylJCgB3q# ze?R9Pr@yW*s$fj`Jjksd_XtY&{=RN+5U+$tN&Q&X*-9Ku;cg?C^jH?hN&f1(v+}pi zqEHXK0Cnz?GD#Vsc5v4n*1K>5BTv*<>U|RFQC+eF)G<=V8^}Lf3)cC|=9gR?i1XCTy?bU_WvY#Lp zG4x%MdQMLs<2iqmCu#Je+%l~f-9Ta69hX(&V*3SukA4G1kGRN!&X3>zR}`II^Wzk+ za08+fMxoA0{zKon@*i93V{C8!lx3a&cw{jOeFJ7@fXy~c!diZ;Y`Wwwd#gO_NCa(Cp`n#hy z6|+$~MAC{5n1BtAz|ghx7!@7<6p-?8EJR{5|N0Tl&!cF5RC)MAJrMeX(|Z2UCDEO& z7sbx6kd7u)r~89zT?8~=?ZKUe<*}j?T^n4A*5m-{0VbXw z;mD5dXB^+n@e}$^q0hc$;|9)w{ocXfWItHJ^X*1Y(&zP9eQ?2p3L#GwQk2>w`?+Du z3uPngoZ15ZYQEG@@x^wr?))!N{z-QJ zP{RD%fFDl7^VL=OoAgrhZ=?LZ?fkvm`DuP&-^Ka7;zG}Wj#XS5jh>*as^=glbm47pnwi$WMb zWT#vVy)Y4^0A-lo}Uw|JXglQziP^p&(x$mDJMb{d7ud7FNe_6Gku zD{VUeJ3nm^{`D82F46K%s=r`#S`s>2`jO(FOQ+mg5kas_+C2K<&pY09aC<)%jZ!($ zpSLIlY28De&d57nZ%7M&X)wuKoEkhcEksUht(QW-GSZR;S*YcHEcD>ySNBjy_Xj4Q zr^wL)c;3g~2%=QE19%QeT4VT|A@4=-)BW8Y3FHN0h%aOCBioZwek!BsLH>U+gj z^wz`Br`qc^s{X#ZTAC8-zY6fT(SJPhA!?`(8R30sukXQ6$6)$=682+^S5mGNZhSte z8|X=CjP~(hw0aDUqRH7d^_47~ag%O$)L}6)xGX+oZR#7NEfVs(+^e2&(iW z-=ho{>GdZWU$cMa1lPbnGlHuOWS^~a%5TO8CbDo&aA%Ijr#*x*jzAu=M>hwfB98j* z1CPW=tC~nPzd~!}7NDK&7|`C&G41D7C^`Prg)-KDyH*OeEC(B&_}cNljf(Cu8?}|@ zVe(#hj6Lt;a+PtpvS%|wXrDJX?3tOso_9dLuI~>*ssn>el$p?;EfwZoal}zyOE>am z-xrH~CW_tm1joMjZU*{y4g2!&hTEoj-w0OjPu?S}di)~3I0@-g&MkC*0va^2`u<9L z2{0X!ogU@HCJ0<**C(St$r5=yCQDbmN%jq~vaLD9L)K&5;~#~Xf_;gG57~Q7ia%=2LAWniNpWLbdA4)8~@6U zjQ_xQ#o(v-bK|g_M`=L!#biMYS4|llNu}LcBntG0(Oz(v#whUavvCRxh9hWh7-ziA zDyS!@;Qu!M%R#3vXz>d8R*sy&3z5&A#zxII4g8saM>T-(KMfgLA_mfezn+1A^S8E9 zV({;MCJz5hID+OD^|g<|ANWMuKLk3B|H=(){6UVK)8Wrw=HTBlB?10hKtK*e_OI#i zjlq9zJpRM5rGtM2c%ur~_Mb6L3gIR7qfS*Kz7AK`8eX-3~$4Sn%P& z*tPg&x}QBCTZK&UJ7%yzF;MFP&4@!LAz>D^T`cr^M1hfigggIlZz8|P&c7Q!9S9Qz z{}o2|i&b{PKc8|JtyTD^xiTB`$%+PplZ9dch5rfSkPi-mf3S;%#=i=AZ2jH&FQfdK zcK(YJ<}Z-^dOmwLh~VIZ6(`=myt|Z|Jd9$Y$2dFQKOvDtBu~ymV}e34G0%b}dCpb%J5RB{ z84Ro^-k|s&3hc3l@Sk5@QP2qsV|;@xoxrXS_IM)W9j#+!{@rN`rFrl-bf^S)U|=s| zsjDum*{gu%CGvm9L(>=YO`_-`!E_Ta9m3z#VBH5y#JPfCAy_+KCxlD0u&IVe$xdZ? zyB(FMtdCmxD0yH*t0eXh@b69^YP{C%{-Z}!?BPr^uNc5;Ja3XqBgVMgIaw)~ngDSTwv?i~_x0%{L zWE*?Ua`%^P8{YFRdR4oK>;BaFAR&`=BQ!6E(3GpI2p)ha@eRIH^+{w0j_mYfgS|X8 zScaRyj#^m^7MmuRZx&h=2+C}GyzlrAJdhxS`YIY$nbcZ2K4g+0purxMP4C;IW5b} zC?Bm2?#W&GvR$TFe{J`ws;^cq$Y3&6s2vE*4DDYD>@@-+sj(w_Jr0&yLZb4_QSh!J zc-b-V{z9>snWMfo8sSbXSs4h|gEF>I$FBg+_nh8B?(RJPwh(6ojzpZX4 z`-0rgdnJC@b1zXeZ$;wNutc(b`MF@?`+{C@ zM)7zLUNy0LJ??2xk}q(>J!p(J><+6t1nc2CKO_G#_)w#5fU76q9F~RFwq-y#({Wf? zFNDKp28(1194DJI_*SU16iBLKmx6is;65zp$|iVb5WTkSS#!U?f1bd66eVk|$k#AY z{wN>2_$J~<440@c@;r^y108LuyQxSE9xcNktiJdaYKzyXu#>XGxIih#TuM}Ku^d0>f^7H^!HoCI~ zIc_NoYP?Wwtk2~HSHr47z9>pvz%-_aW*TSll2G{|f|jh6=V3n@`#*EVfQ=y~Jek@D ztiSFJ?hkH(sdIwA>OMeU%~W!Fc+6Bhh0F1i;GLs*V{_Is3B02_!;|fFt}pzawp_LK z6ZZNYgtUX(Oc0h+Tv~u*bdFf@41@m8g?j4$JA|z#N}yDS5S{zDMW_hD7yb zNMtqRAsYp%Oe{=J!uV>I)zyWA&w;k&EV`QI5A< z3V7TxIoa_Z?lVxK&{m-VsjyX4*eWVKa1tt@vr>Wzt<{U9;9jS})+ji(s-FgY-Dg!$ z=*h$rND{5q%R9oZm`hZiA7%>n3^z^GcbyyID3o*z!c~81gz#c)Cba}W)5P}p8DfS( z4E5}l4abi*I`K5S_Vt26Hu<8yJ@hqyk$3bH&=GTAu8S1P_7}=fRmaMYIR@T?&vDhU z3FgC8AFYuN;d~GlQy;B2$UkaWcYR6CM1O@Keyh{73My7rE9H)IV z=}$=i5r+OCEmnUjJOon5>S|rLkloIke+6t2?q}|QO8;<8FEvt{G(!wP>$R<+0ri=> z$LD%72n1{aiuk0|sIR=z^kGxeYL2E&4frQfHgTG^a;h}9zsH6<`mRKV;`E)4G_9`& zS!msj-SO~k&@ra{!UZ>W`!#L4s7aOU>Iu62Lc~Y5?)s@Zu)>dllp2QC(tZg|)K>?& zB-l?bpgUqU02+6DwR1l3C3ycCFtQNdi1$Af4B8*sFka76#PQ_*)MO>yUf}r%WaN0Y zkbTi~guC+co5~V?jS5;t{#dt?c-M9C-JSCZjDgM@P5DGjLF4u77z6qRG&H|C^UdKP zArmx1zb9$9Ph8=k)h`S=m^ge6#H$(QPhUXz8vDMe&LV}^V_PbAs&Cw1fc41FKn_kA z5NyRy!yoMNl!dhZeKPk7%P-eKp7`T}4f z#YoGU&p)r1Gkb$fpTVp}*w8u6iY(LdA8P!*l6N{3vKtjL8o!@wBZDEcgS%qK@68m( z`=}^Ma$Q^UR+O5J{NOML`4UQCZEc=7S2vC>w)!czHiq={PS=*nq(PSV6SM*QtO>|s zj@YmKoOo{yjCbx2RP#}}K2Y~P6FwFr(|oBvBoyDf-wH}HHa|^Gid@40fYYi1;YP-& zuNF9A^g*xo;BrLVC#saAzxQzYON6u9hkt9Hfc+pc&+rEouR(D!)E6lqS{4Fpy#I`^ zgr**ubq_u)o1_bLwC!Lof!4}Y%(t3z2_8S1=dZ?!6n<}Dh~n)h-aJ6KYN#%p9abZ- zr|C4mH_%Zf(4^^1k{j~0B!I@kCu+B~c&;eudT)U5pR(ik%PKW#7aR4OL*Ao}tfyGp zJs+~R-qCMUXB6tZ>jRPzAHQE~Kyp%-&yoo@hC&pFhp}Z16~aFjdFeocH~$&@g|U|k zxqth9fTMowQ3yj2f0xk*0&O8=c$^Rk;-7T~=p~51f#z`wc(`Kq*UUlFd^MV8gfp*d z!LjBz$r`8bj&@55A14*yK)Q5qv|#NQQob^;SQ1I;Hk z2H7?%@QFO3G48#kFVGlIDDkt!FFouO+x_GQG_tnZ0b0~IS!e?r^%$7vv%zoe7CWFs zQUCZLh-FzKxF4;abQe99;HEkS?{^SW_e#MLDtcah!9Lu&JYf^}90g5K}$d+{H6EFpyD;1)lS$WY?4>igu7=k_u zhhvm}mld=)9}PAJve6CbQM2-vMFWxSARR2BV<*|c+tcbH5<3&ln2o?u1S0pPv%pXe z%VGO*e>h;|ct62l7@rPWAEVgZND8gHKEogj!UyL?-ks%;ITV~W(4-lX$j0Ls_t;Hxb zH;O`Q#4H>+17bSv{p%PvVjPiklSg+FS{Vge-H&^dTSc?VGDDm(Ysvr4s6Qq8R@6HL-Lr_b78{EdI zA`Mx*j<+xH3{jjqpk6$Hzq>p|`;JlH%V0>2qh@$`yzX);I}H1w<&g8FiP8K6>ECzoUo(_W8^#A;}U5n>G|FOq^?4hdO|4M(Lg}CZ8X|Ue9 z*B}xqN#J{7sv;|&I6#9=Cw0Ol91$N2kmG?@rhHbs;<$G;Ioh z{n0Sz#%>biG zf9riXRGdEUfjp_uJNkr=y@zE|b-(vN zmG__g|FiXh;{h=yXs4n%(4El8z-o6pToXl)A3LnZ_(2`NmllOocVCqV)Kb0E-6!&2 z8gB^K@b><(3E%PVzgQm}eqBv|zT)^f^i@pyJ@5Qu z`Acpy^qKU}`8{Ov%lzm38kqe4c)J*V9sB%ttD(=*;^rB5B11ztfuztJ6*56t-g+|WmQKc;^fp$1P<^&A!W!X$B6z}kcpFxuG zyO{R2c=-<*@-H>z-(bjZS-kuze_KA{Kl!yI)bbiwX{P4mkDDFkSG+~aM{{v$7A;50 zR@*Fml&vG_DZ>MI80LCp9_Zf#lv%2NqO6V(&xx``@od>zZblfggL>-!dJb+m$UO^}9#kKROAqoc!n-q8w+V2BU}@UvDn} zJWn{B`-FQJhbPvb{`1>izy7H`sQs;o5deo3hF8A?w=iq4``6!CpUFXcKr(I6QOA>W zKL>G2U=|JTKH;eI(4)ubaodeAlJ6F#Vp}R2*dKY;*%&m<5x;)xvew_Q&wrKPsV`Sw zYuCg+;YNVV?HkhV_2Et7^FZU0fH|RFkFH|o-}6E-_1~fYv9}@bv*PGo(h}(J@O#GO zcgH{H*VW|bD~_Kdzluq}=lNpvKb=0Y>ELh7m#1A5YyXnzN1m2GHvd2UP4!*xXYQOF zQ`-}i-@jAdp#KZCh!-ddkH0DwU#vVg{Y|H( zkIny|-~Zn6!GG-ef7hN=RdDzaC$m?FyF7jR_L=4nIEa=_@qR0C43qCQ46d=tAg^o% zm{FkN0~5_5`roMEs(n%GyQ_2lR{f<$hxe;r`!CYpe_Hzg-~3VUul{nE-e0|Y0+niU ze>a|g%AdTR<{P8#6#K3TpTS!FUHiKG&x4np&c9EG*Ii#;`^WrZ z<$=G7FX;;2_I`@507Y5ydSexne$TjK=zDQXVviB6`5@F=iggCsTZ-@Ca~}wFk2cVr z*z>;u_WX}T73=Thj}#9vn&@>rL=TlELHnfXrjY-@p5}Qy!oCjq=D36=PqA z{t1)5cX9Nz{zjqXCvd+r{aMl9e{c#Np0^qxi$CwtdP(?(cdkN%oAe1+!Z&jX?!Q0Y z0ho;W=d>QN^~cfo8Kl|%ddE}6*ynWRgZl0r{W3=|ds6Pe+2Q_4{`6x3Y65oMuA6{z z;7)50_+X?jwbSG3os^|EZEb%Sk`eA4>O!9!v?3Lz1CG z-1s_ny>IxXwpFa2$ZM~!^;ZeT`dTXztF~~9Yv1_ztSq6B>3L_XrXl0!CrO4n5fjZ6 z;n?W0HgEza3lM1SwG)oY_RnWXjoon#58etmOkC)KkZo`_G)$YU8OTM&Q{B1ZNR>u-GRcVBTu;;<|4-Y8 z+E06bE85L=zCPjgW8k@Q{`ybx8ui~4b_WSdWFHs4o6Rh365?~wpKsEaIz4@~tEqo} zBK^f5zO(DnN+*l;26EW-b*4%%>gy<^Yp*D_zHBRPX7Pz>C|4frg#)Rda;^4h9n*)uCT`g*A0@ zpYR#@QoR2Wzl)qfj?QLn9BFc0M!&!n4LKr!^eAUKzMjsWRNkKO7@|x#zT(gt(9oHO zzSmQov2Gw?Drz;0KXL!j6`%VQ#>(sM5AIPhRCq+B6>ie8kCJBv|FS9~lHOmCzx*iO zHj_V>{wc~|gnxJtSCqMk{MMN(&aoF_HLnUD z(t7+WLP~pza_vcnkMMGD>qU7Zen`sjP6Cbs!Xfci!6nv^DbOOAx!fPhT#EpUzyhZd zS|=#3NxtX)7MUVNdCDI`jKFBC9o82gBqv$l-GYyK%P2S$_mb1a zY$_`BBtnB7=9tfiD?ktD3(m88GIHonDf_)4PgC=^j%P!0uDH*WxDBbc%S83idobKk^XAQ+?99U+{Ewe78ugSXsm1sKQ7ci{K zrW1)c!Ks{)4T%GvdB5WI-p)7Tbvl26FPR>xqZftgvBtyYOD9>Yhv7_TzN3!1zmNBu zaeuN6*qcx92+Or{g8SILjN3b!j;RAcor?(o!KR3pGSRgWk2MOz3%dVP_N8&UG|Srq z6i2PGkBVZicQh4ykiII#4qE;33w-~&Tr0*`);U)MiDsklU>r1SOz*~hsWhuan*BZ$ z$7yemIZliIo9BOMF66?G4$uOJIs@mSB+eC$ayyV$cz}#bLRtB{M+h(%$~X8Exb9_K zIQEF^Sm3j`KHv#Rj1;=-Unx>{uUJ<48ZqYmNseEb$?@a8wbCo;>#3&>ty(aBF1 zw%59egcaoPl)TuCyRB0VU?1ey1s!m1B9(t&J-z*(9HH$$5`QWC55}Li{jUu<_P4M zgYaLre;H&p?C)!@?H{`HU$Fm?Hn#mMUBdSN>|)qItn5G4W7}Wq7w*7<7}eO_Des-g z{d$Y=o!YyHP=~B^ZZlAaoHir7Gf7gnCHFv^hbp$rx^@WFKX}k~>vH^wyk-297cNKd zxTq?qJmg+LLz_^Q5dNVMdVtW{nTr}a)@ki%el6#{5nhAi_3+L{Hf(SHDdeTg2yk~H z_<70-a_?vldk{Y~9;GV~9!G~q*GbdO2oyI{ISWVYdEh{e_sb8%4-O#2CU5Dfyps*R z&n!w=-YGfIH#_}A?m2^dqjl~TK0$c*zj-3Ma9c%vpGL1TrJ>-7NEJ6;j5?CNvnzNP zt}A%xXx=H$LkIKztd|=eb-a15XVmeQfigsZ+Y9PEjC%~{k@~nGjMlH(wxzaK-rVqJ zH$6dgt&fbR+I^VP4+qG^zo6seL;u_lD}&bV%m!W32?nLUpSq7(rD4zsgVD(TihJQ4 zjMU%4Slb5Z;`ipigehB;Dg92~kx0MY_?lS1ewhjM zyKx`u_v`B}{ceQRf1+PAc)5MK+D(N@zw4kF^gDGS^sA}#`z0CreeqGxUt9 z4@YiS%Es1*VE2;hLkK=oa((Ewm(BA1Yc8{N1K591AI=;cQy&_(D!D%FM!6SLA1c6z zu;r)euw_GK%eRwY%f|*4S07gOPvkE%@ino(WOhs7FU|I_VHUsYGE6f_{wMzOCmbN& zUm8O>=(xHgbZn_~Tof&pWWD@=%U^nq>ik|;Pg1gXbg42)Ny|Jh(+{gU{ze7PU?%Nb z;Tw2e_B8sSf{NYnkL5w={pdx3`e+os?8=Wr-(~ihf9jV5Zt!Das$EVim>)JC|ppTX80EnMD2 zc9hXe#N}3pO9z9?+rs7Kbhhr#Pq0QSmN1u*!X;0*3@0wN9WK9I;b?S|aCsW}pwYX+ z<(rq7OBdnNRk&P4T)utHw#x?wmzu)mS>ywkTZGHJ>a6DeOV}>Ijc1J__{W;yaJk*! zvc5cLcpUk_&+3JY#T45-v%|2QGf$ z5=~|`pAjyf3YQOw%Mgc4w!!7|6wXkDeBe?FT&$KIX;|qwS5bLUJ^!3WLT(8EII2|A z^%&uI{<$4x^HrvaPqsE8re_nMW`el=?9g|wGw^9beRzc6=ex#n~ zbNVOfnSy_;?d?ch=eggD{+(F=UgKE*Z2V&_KwP|P0iMZrte7OoMy@9#35-zXpW{s{EF5f)CT)x6T*3Ax=b_SQXgv$Wr1D5?GS)&@Ym~}Pb z@|1ANBQCWZE(b4jH0mo{cGP4nZwr?t=Q5Y?TCzr2!lf&5*?qoF%<}6?=X~HEWTt*X@WQWUlS&m)03zx@`4_L+tm!B(3!L?w!Tqaz) z5tlCz>8|WD(cp5b3}={yeBg4aaM?YZvAic-$_tlL#3kTxxy;})SGatLeBhEKTyDI8 zS>G*OmXBh)EXF_9`3{#d2A79~O9SKsm(Te&+0L&?@6|%Mj1?}Uh)ehdTcd@3$1W{| z%K+)Ua)rx|RK{|uIoqYDaLFPrQyeZs4K902bB3wN2Q23amlLbmE{lYVSGc4QmwpbH zHU^hBgi9gvfs6Ghm-NRgn9CEwW%Xlhmm>UQ)o{4{+{4l6M&S}bK5%(UxLp1jbLlQz z3WUoT;<64g`KqMfHMmq0E>)2aT>1)^Nn4pqS>ci`Tr!DE$l-E}!DV$Rwo5qFeqs4qexGWVe3yDh<0q;tq zjh8xh=_*_vKt6DJO1M16v*h-<9T-c^F^r`${;}pcT*ez*{*vyl2l9bSci}SZJ;u^O zxNOR2E^G0RHQ3?eH@M6eE*Z!NE~kb`oj$}|jWQYzW&Q}p@)`cI_941m*=2sFW0z*a}ZKzG1a%OBLbbZO9tkE?n*rF1Hhx z42Mf~gUe{)vZ(=Uv@)0N(xHG^pDSFN373Y%y;aQQ!k-F09U=d(BP;2J2!-K}VF3vR*P zrMSBlmtY|Tm*Vd3?oNw)@lxOfC{n!Gotf|MdFJWi-uwRX?(fbrpUliYXZM^vn{-i5 z>1Z{Sl~5M3(#(ed8BlOlBA?`^dY^>VkDS!5qxum7r*z}qn1f@Nc7OccG%A?|DE_I-U z)mGaD^QSr8ShMtbC+r z)18ja2Aa7PgK`yDwUJP^)BQM_Zh0u_p`>7Ci&4fZl#Ng(Q64EDy33L7hz}~K)5=^HL7B};W}|#AW;*H!B@gA1vKh+I{W9Il zP{N>$X5~ITweHMiyHYYhsX%$8bcT|tvUD^bN=+!0Seav#?n=3XTPn{hNiCV6q`xb* z)PNEd%D3L6)HTXaO8FJa(>qejy>2p>MP$-3+MPz`GJ(?Rj@lSj;u__zqGm3&pmabj zv!TodLrNoBP7PZuYQelHA#i7K85`~puqcl*;@pqCCm-1-3 zaiG*WC&7wPKJ=1~UeU%AXp|q7G7L%z$|L1uSDDK{(?$6a%3dhjSb0y+qdOh_Uc}6$ z2$bEFN6H8&V^YX;kEWLC_JGonl}$z&t&}%!<;Pa1JW`55DM&Z1Xu3I}eC#P5{X-j1 zC!>^9%6cf@)=4ezx=2TRaOdP@Dyd~BlufK;G|In)O-Jpa1mn)hMn@r&YS|2>AC&H_ z+@%NFow;mPN;*eDEghjOm%DEH_d;m}r3ovujM7ypx8BH)?Mitxmkdyz;fy&NN@gf& zSgCE4EJ~RHWh>5@cjRxn2Cn>D=t59#^^oaap^eAiD1R0*bEyHPJLOR=ze4#NXUxSZ zWxB1QG-c%kJ#FvIWv)_ULfJ-nq|}1)YM#{68cKR7DOee9lzK`z`dSipQ64F=p^U*t zj7(7e>@FSMq>ZPjQGQU$5Gd6tkCfw`WWB`8D{FTwlqjg>D=Tm50ePpR(*@033PHJ% zN0ebuc2AK48$mgXT28RC!6+k@^3Ok#FoN=^mLgF6cS$e#XxR_jO=?D>IGKSt)rlpc&v=&NnCz2z zuHBi-Y^6kpG7@+FYCu_>TRJ+NOy<%QN4!@M*J5nCi5);apGZKuAT6#d~ z$Vy?O#8Jv1DAg&Cl%wrsE+tY*Emu)XekeIvc}N;JwNWeAkGSw;C3 z%9}1S-M?w$S!a|HN_qK0iocdcltNH4;2KaG%2FuvSZQaJVoF&J<=Hyv=pX*pA}p}& zFrlX)!i4J92GO48q<}$XEbTphc zo`Ob+t&{;!nou4ohug?{*;Yi9&xvI&6`+)4??g}R1ykM9x17yB*0~NER;i(PVeA%vr^S4>69`F$}PGqk#fD2 z%;hxQU!smGK^X?6KP%CUax0gaOC=~%D328UJ=nm?c>fX`N(m^1SUEJ*JC_+si3}w^ z-oI2qErW1{J)2Ng-PiWg(R1{$S?QuaTQ;>+R+8x=}4Ty`fx*#~7iEBTEQQz^ed zIY*Zz>gZrgnafe{z_Peui?0@<>?;cq$tujZ!8+IYxO@3qMa!*TxhR zMOg@C8NuY zJpVyDdWeqha7U4hay_e=OL-`}piF@BHnAL|jZl_Cna9e${@%GvQHmdweUwKXRfKY; zkJQo=N-HQ$Sm|e!DoWXn2k)j+9w`x_Ogtn?J}Bv+BxfbSC{dNt8%ll3BV}(BnadBM zqP)YAzTZ-g^dGeGJpIMn(ZMWcE;*rGA16v*D4kY|vJ=WeD6?2uW|RR+@!*C{Udp3d zazpVuA<6(K&7m}4rMXe^DP=K~lgCAQ(pWl*J6DuqP*Ooj%t}(DJkM-8Y69gNov*2u zrBL=bUwg!n^>Vv~%;hR=JeTOXcjsKUQYnd`1XCU<&7efUJ+gyP=0cgyN|;evD&_p2 zQv9jpvR;xvN&iHGBcL>fQiqiaMoFQRaZsMpWr=FJD1T2i@Omy;yJevygA$*W2u8V@ z$;_oJl!cT>%6KSgvXaXvk(JUD%DDxi>~1LQr6XPeb)Y1Nl7N-) zKHiS@XEbxk4kZueQ7yfpTE|+NISz?r5l=28~kU!vcloQIaebSL9w#;P- zl)0=lHA)_(EQAt9c~pz1fpko zP@=MOp|^J~%axJJoSy^P1zDoJ)wj}hTJgOx-lrB(mLOI(+=5m5Ip2kMWt(5st z@x37`bMlVEWu=NijgPSVCx z#wbaYG8#%e$|L1$9hu9!BbZBcnafBhgIW2~-P_T{G-fU(q3owTQpQ4=hxaeLp;UxY zij^Hk8LyOoZ_1BtgZD3`omy~~8U!U86hBsa7^S>YwnEvDv()F>(or9}uF$ZGLpk0^ z=5l~Go@_?(Q%YATfw&%RhmvNi6zB(K6qF&XJnZJ3%kI=>E?J;>w#Zz%lj7M^-On#H ze-3$@8kmzdp78e+iISZDC;i4|?2uNlt`e4i7iE_V_!e;LX+rPcUkI!5jeZmC zhoHjOdSwmm=R`z!B&SkPvzTx|a5|l?u1t{Zj`(_uETu`#G($^@Jm9h&; z9o*K60A)f4>1ZI7=k;Z}k7?t{ZImcV>E(PELU~lno*FWjMtFE25Xw3z%UOBS+1t^9 z6lN|tprpgY1AU-W93i!Qi!AG3P@dG2xjdkaCy7!1PHsAC3}rXvQ7ucLl*1{Z9F$d1 z7PE4(lXosFl#&pNKjo3q6v~ACQcE-_?V+?_WxP>ZDCOK$NvKD8q$Gwic(4RdMUv@e zhLVPr@2@6Ayu zs4A3YQ0B9;yQ6n56P5DiiX?p7EJ`^jp>$)3l(N9J;qHXct0Z%6x*nz>|yG6)yN zUQpJ(m0IdTSp;P^D~pZNPbm*COF|#Yqgrx630@(=#87^Q(vX!VMhQ^L0w|R!kCew% zWG~9lloB7x zI?5xZF_b2mfw$~-7DSQ%%O=1MtpNq%ew$|EHqls4lfm>NnGD0NvWYm}r)83QE; z<&ko(vdm@mc2Vy8$y|~{Nx;h2cHWLIB{p*@1*IqDkrD!B|71~?K>4GV%;geoJUfjN zrj$kx~ZAgx3;m17#MJDXjD~N(H5CgThxsQodA@j%KtHB{P%;P-?Q0-6#>1 z(hW*8$|Ge5l*1Y27=?eMKSLRoFfo)LSb5aeJC{9)%v`cU8Ao}f^njB1o7A!f%GH`O zmvgl7EHp|Vr97Z-80jQ%_p8*B4N6ca33h=p9m)h&8W|;*QszO4M|o7s!yuVU$sVHQ zf>H-cRaO!hB|M?&s6LeFlt;<}D5>$AF0a1IT;fBC&C1y}-nlGMN<1i!={G@0X$Ymo zOsQoHl#4ZFF2B*nGu9|gmGawpNytihq{N5Pdx-@5K$#3>94n=bl2|FDpcJD#QqIU9 zaSeQNRFr~HYCs8M<#TIqM;8*9xs-sigYrlj1Ev2KQ9gc=xx|GMot5oI302BRd^1^! z@<=HKr2}f&1LbUWnac^4yDY2vWD_Vc~D;Fk|s>(QBXplj9}$Qqdfl6 z%%v_A56+nLpfrdtwN!vo5lSgmesAfW%Oa)3g_4HysFwOr8t;`_VnB%o#gCQIMro{+ zQ@H1!p7KbE2c`ZtQGWX*b2(mB=5l~Go{~mMsFYwRsVR??-{eol23A`xN*I(;P=>Jb zZwqfn=i-^U1VTwfd8CYj(z2f@)uEJy63EI{ql75s{qK_S6XlUo0?HeF#E1ta5|qzX zNa<>nGD_J5WiREC^0BNO>4Q)%{VQ`hOzHF``5soX80E_krlU?!#zNTwrS(TCa5|LX zPzJE_=g;1`>`+QZDA_5GYUu*yJkH+rp_GJDn3cIk>7kUnzsZmN6=(0vPzvmpU{WaG zDoaNnXyd78lx#|w1?B55spWndnM?1bqTKu_bJ-7N2P;1qgGWi45cqCr<;4{vOp>RPzF&RDRrTQzLHv+Ln#I&KPw}R(oiWUPD?^<$|EH%l;W)= zm=?;XN-~!>wDA--N_?dZhZ2?YNI6wn=CZN3C=Wi!Ty{g*!petc-j2@1F>@&jDB|mm4KJf)YS&rY2ZwI9ilmJ#b8>N&|`1h9RJwsQ@ zqgvjVl8z#yz^qW-2gzJs(#Dg?D4+aIM;)O2P8(7-LGgPa^wak;m+eqCuyU`dcP`tM zk{-&*KSk*Tr4T+et%K4BN>^598>O34Zl9DN>qmK1OGYR|$)q;BLCFUt2P<`ql2s`) zp;SOEcT37#N)Hz$0Ltr%GM8tx@x(F8gV<&+HKD|$JgQ|DlwOBKdHqi2vKh)6R!%nY z&SjobVnHcNd8E{a(gzpJZBTkZ>B!0mqtsW*F}h_!BM78CQv9Kuq|X)<><1+`l&q{2 zGfF(A423cYpGi)Xkh$c#FG^u3FDl4fJhbt=YwYdlw^(K_g`vF1y`bSxf>6t+w=$RY zP*$+A(I}&o^7^>^*m$U=D3sN>FSQR!7btC6>1dP^N?8Y`Cu(_HTsrE22grs($qpqW zD;bRPF{bIL9h3k(K(+x&mM&6YDJW0N%Uu4Vjpt4y?_9PhB`uVfo#hyHfbz!>Q6fQE z4P^-{zZ#{BQf|`i8#<_qDUXhHdMNwnOYqnmnQjLtEm^5$l*~$*4rMguk#ajw<}#|D zC}W^xfs&S$*haY@!_1{Rl&X|R%1kKf@I6ovl*i>{F865TIo{AampMv_0p-aZQEEcz zHc4uU1!Xyu1*{A+N?oNKIVuSSDUWK21!XgtG^{hPWiD-?G-IWRQQ|6PFq8qP3(m&dE?H{`h=U6Uu|KGM8Jl@vJvWuu@(fk>ao5 z^HE_aH9^ORvKY!7R@xgSP$_GmB&9r>%j=@jQ4zYX&|I$kBXem1r4cLXjPgF3>8K5q zh?GalIw%F-i82#P8YoFwxmDjgmrY7Z4dol|;)ql!>5VqP)bsNG~ESIQb0+_%F%k>xy({ZG$_$Am+DaVw-BWTl$)hxE|+QJ8Df;$ zN;!l}&uGe{j$$~qG!`X2l-W?GvQo$>{z@4LB|7Dia-^`#r5b8^_)_N55K1jp{;BKj z=tLAVm;6u?p_ai=CgZW_6;P5uiO0%XqYPKdi-S^pLp&B;5Q-nJBpsk!D1bFxp|e4m0c9d9X^iqVvgxQ5l!}x`b6EprNKaAzej#(I3#A$> zf7J2LWrI>uLMcyqq_lxD4HwJ}P!d4#XJwjEIw0(`us@V@ zC1frqY2ztilvqmX4<#<;k#eYj%q1L;A{T`+0m>LwUe@wtc~nb&D6QoMz$n#~vTu(h45d6$qCgo4<=j)5OJpctijk7fC^3}M4@wy* z2lC2XE({lXGL$2fPIpfBvhuvTx1%H9{N)ej@}ETvfHI5nsFwavoDWU4p^Shskd>82 z8LX72yXD6wrH@IZ$lrqv3!I7DObMZsf>MN)mPRS4lx0wwQywXQ=aG)eZV>v~6Pb%2 zl#fM8Nnw;%Urk5Np~R*4`-M*p49!7bjB!}|%GYO`I5(uRLE0v6rS}9>rwox7_R{~@%rBTb> zaGA^JBGS=Y+IS)x<;E8?mkLl~qLztJ{>81GMNsxY*~-fPs@}OwRZ0XXhbfP034)UB zk#y7w$^a<6Soy^$Rh6=5rzHG|vv(vYMGi_ZBb35W^01P}DAAPC2TDZBqgwXmmbo0> zB+6rt%;iI2naeBMc%D`9c68{onM*Dx<0y}meo)@`6=fBa9Z)v1vfL;Gl@gA(wg)JW zlmIA;uZz+NN?$15Sozr~`IWK+N=M2g)A4Cy z8I-|Kq?Q+tWiD?D$z1-XjpuS@?_5?XB{7t8lt;BRhcX$#O;EN#SMN=2ijR7xn6Ba}zVrJORCu~S9K4RFB@gA15}sW;I(lD}U??4-v|=TRQU3m5I%*7MJmrzH1j?djqLhP@6-qi*E>`r; zWrb1_Lg_$xq%?(c4L69QLGk35x!k9XXS`8bDCOK1`LPQqkCenvI?R#aslQ~pE1)c7 zrMyv+D$|EHd$}lKZp=5-Tik00J zymOhTlrMOLToFn+DCMBUh4NQEnage3czPQpNGUs@_(S>jlXNs3x0xWp)_X2Q{LOrzISFW*`PF|JW_f=nY>Vxx=_+WNyf@z zqx4hCLtJ_qQXVNepgegcN@6JY^2%Ip(8klmC;>`Y0A(}fk@7gJ%%#jMQEvPxb6Ei8 zS5^`k<>_0~Q9~%PD36pyP%76HWe${PQ0lXCzMOY1%ajryN_@&Ar7@JVl|*R@B_)(Z ztc){CbETZwCZ2uf{Mb{Zv2DW7nn&P#culz|cer45v% zP<~{kr%@^>WgC=_e~R)YvvibwzSNQ#%Jl%5%LUqavKu9WQo2ETGf$KqP(C6Ueoy8y z6Uro39+mOVWzTCfm#j!QL>ua;2b3+iV6K5u4@z}b78<3GQXbHSh~DoGq&!lxL3v(6 zf?c2_gc669Mn=h{lzC9LQywV~Gs#>A)fFWdl*_qgE@x=tNnn)le@sX9p_HXOQWikz zgj!zRmAOoXGM<&QrM+`mqLg@0@}ic8P+IMgTDCx`1*I}8V~x^ODZj0gglv>YwZwCM%_ll2|FDpk$&vQqE+QxhyR%N+bIDvz(Z=(+l((Y`ugqLZKO=b`xj9@-jTUXgff%AStWkp6I?^5;-_W8PfN`|t)ls9Db-I4#ZTLeYv`p6(s_#mQbJQG4|7WA zcWC2@Xq0O&P3PsHWTiZ+ISk5MdN)W*HZ_!`Q0B3+x1@J|la=xfH&gae9w`-|Ot>h@ z{o69XR#2L-($^@Jm9h&;J<1~`0+bq1mOx1dB{?g(jS@vEy`Ut4vM0UFB@vW1Q10iD zx%@#J&yy0~jt;yqbIAea&IPHZ50nkK;$()h5Xvl8mKvqMQXa3C5%j|qCl{36tt1$J zOXkuXN&{A!86~e$7C{+7c{G>sbkfo3qN1#Uk_t*Tl({s9QiqlDMoF%e5GX?_kCY2(WiHO|)og*13`%@f{ETwtZ!?!NP^R4yB^1i~ z&r(YtC^xdnTrSeavpdi`mx)UGvQiTIQXbV(4$471I#3YGuTUnl(%UFOO4$KrEaj2% zEsb;(qqfi=|B$)Vhf;%;oJNVHlpatn)DmSElpx%x+yf;Ml(?*Tig`QQ_sq=2c~~wU z?o{@I@^XOGG8oF0pJXm)Y2#UJlzvKixIz+kQXX}b1IqEX5-b5_8k8_rniwTODGQ)X zpgd9@r1@<>?(rSm;ej@*#x{s<)&E9Z-P=dw&G z@uB3SJW?7%DRM}ZQBW>qmARazjc1%unk(hZa!H6td88zS(zmAsD?ph9C4`l-MoFra zF;I$A9x3Ni$z1j{5G4kb>QE}O^0kP!qf1ZBTuMO+raV$YpiB%A<+tlHmpD+Ov9i-B zVM_V5OcLr-9w}v@IDau949b}-GMD4D@$@uG1*L3*a_^xiUs6g(89EDH9m;qpqgcsq zln6@c2IW~NQFcI?OD0{_;z6klr7SCt3VY|WC)~^>E0or#r3aK|1w^@YP3964N+ebm z8l{g?9xRoFOq54+$p)qHSP4#taw@aTyIr@o`6iNjsC0RLJ$UB!MN{I*MZC_CuLg8mAXu3DA%3PvC z`Id>4u|{dCl;4&}LPE-;TH-_bh%Z^@K{-b0bWeIeE2WK+SSh2R9HKl@&Lo$)Y(6h^ zb0{OB3})qXL2pMF9-Fz8fHHvcNErj=Z=Ai;LMa2K7%SV25~`Gsi{-~|!`ZtOlw7Fg z!4;WHL@1v!lG5EM<&?4o%KLLt%co@0QKOkcFN1Q3(rGTcS^3E*-yWHcx(T(pcGPLHUZa)ay$!mw(gCTwc@0Q_?62l@bi)IL=bPC6T%O87#GIgR%?CW>)^q=k4g+ zUuG_WP*zhO)iMf70el+k2jv$iJy_Xlln|x7Um!pBD?S00fbzJR1Penc2qiZwU5!#k zDVv~dpggMOV`Ay(=`K+|U6i@JODA)AK^spNqkMT_I_d;v3+0ir1xjbCj9vlzplpM( zo|QlIdgro3DH);ULq}bpMB6ONP$<2jbYW$#QFKyqzlvTPTl|Kq$-FOK>8T&QRL0(%C4bl(GRz7s?~$eFEud zQyEceLdgat11p(~^68%Gr~{P#lt;=YD0g0n5+BNw)H0U`wDH`_?VZaurKE?lobpKN z1f|t2QLdbmxvYY+n3dT^>86z1b0i@f<&lyRN&#G9XF_QYr3EW>jFMF;GogGvDazgW zGMDj^p(gI3)C@ENZm($zPZ+FaG3PYJhd87=75{xf=(nI+(rOf3fZ9E%|GD<10 zXUUJPOnIafh4QI^1ize-xom{8ij|H=DWQ~gP>xU@DR1LRM{5d;asWy%AFkExolBNS}4JkM@k1MMRE2n1Lf}&GM7iR@%(C( zE=sxitNhqVlt)T>DBZ?LFbb5lP?oV$%P5(ZG95}m$|L3W4>FgkVWQmqU8Xw#N-tJo z8|D5jGneX65>g&1Gohs2CCVZwg`wnO<#=}QT;?bx29)rfqSS;kV4x_ipnOO!b9qG@ z&oHCZRmu^(wJl9~R7)%34p>%%6iRDEan^ zk{HSeC<9qJn$ za1IndC?AuMQphO&N*M^{Y(`Oz#Fn{S!v~e7P!3Q!KjX#9KUusTow#o1k{`+f%A>gq zh7wXl!8zefxdrI<`M{{04r^cQdB9cpmd--QeN?w zCt-oHic2j^pnOg&9lfQECyi0wUNarFf^sQPlr>Pk4i}{jls!xBjrL2nM0pSqU?e4Eur-Gfi@mL zqg=US)<+pAfs{u|D3l|Ai82_;o4Nhv*`M58=Xc0rkn_u5B}$#g#@kh#2} zjmMM0+tI$uW-i&FG{)VmUQiNll3K!`9DuTumBmKsr<8{iCE@l)sU-)Lmjy5vD7~R{ zVWo*t0+g}<%1+9ojvhyoxqKZfN(?B4paigz$S6-QnT{Gl;XkKG$|5MEPm1!}QJKs8 z_%fH5wDFuz@14ssrNoC)jq*ro42AFR(g)cvDBGcIU}c<9nk(hZ1W9;K*9uY+LJ13& zV09>cpmb%WtWlCGWek+Mlt;?Bs4|y%okZERQ#wi)TRKWk8_(Bt-i|I^G;=8hr48kg z5(34)kthS9Jc=dCUD|kd8YN6ApYSI1N<&e~K&g~klq)-=mex?3veMHi6_m0K%8!&s zwS0*p9aUH<%1|f?p!l1(hW*n$|Ge5ls$OI9s`uD*fI}J@g>QwKp!5^#izWDU#fP*E6ed*rkWWd2;Mg8JVJ9y|` zBq4%7_UOGO_+#fj#c9vL2%TwHSma~;6Y&#*|4q>+VZbTc)jsaDLpvkVj;tqT*Qir{ zV$xxD{$apD36_ex{did9QQk293A?g>ezhxxd}`aaZR?7mkuy?)U&YX2JHLIStHIsI z-+oThcGCa3Kg<8KKX=kF- zvA;+13yn_yV+`Fe3iT^|sCTK*gpCi;3LXCM8#*>|gV+2*ozYP}^e8eNn2NM3BzEx= zWs65n%Poibj~_|vF{DcI=wX#2g!rf7)G+`2%*KG`ueCmw_U}QjCIbxf590TWUhN|7 zC++EPMmxYEx3&_Nw$^CrP+KHR8wqWZ(t2nuhNU$#n$u~W09{)FX!VtrUTc^7>AC%2 zG^f)Z`LyOaAf5Uv?Fxt7PUl(LVcKOMh4mG+HXGWB(`IgCwARkjdK=A|TU!27%X<{6 zKnqb?VXXyNS|+3M>xB-VJPha6zV4S!Yb)(3huls-_0@B`PP?2=o!Xus(zR`bmQiW* zw06wW#u?2yriba%is`f^w40~Q+^TDBqNNoxnsZDCY|~l-Xp@x|L2C^y?fE`$r;hel zX008k{WQ!9O54IAcW&t|ZK2Vexy|6uE#9N>3$!mMsnd9c5BIJb>fc7Rk9gJp-G`6D zS?5(>i?syB>AiG=3>HLK164LRFGFFIhK~!Sn`Lx zuKeQ(UpeP)Egz+SOB1NcvU6_P#36SA9Yl7|RP$N3&Qx=A=n3?N7ENibw3gS>YWQgL z_zsbGo>`#n<0hQi5^C*BZ@sD`8qGO675O8TSNmg+blOX4CphGGdcxAS(k{ExPn)$i z9$HqV_1D^DOKWR1XB}qb&m!L1ibK203&H6$o7NgzS`r`a4&PStYA<(7r_+^on?r7= z87=KJ?Xu4Xv*?qo(H29ith9+*d(lfDg+WGh&+wL32U-NBmD1WSOUr3AXKn)r>)N71 z+i}dy?e#9zX|Sc;qh0nf9s7&cc5-T%e;3xA*Y7gXoY!wv(cIVX6xQVR`|Dv!;MZ>i z^ZG5$-&A>Dzs(>gR&`bu*?B(@%c`>w%XpQ=AdyOnP*2zS1GFEF_;yP%SHaLAp; z`>uM3$YY04+di5wy0;(w=YfYNc@{-6!v>UF|sSr>-A!6HaYg zIOM)l3bwR`KH7S@UbxyX(B>(vjn=AKS}mhFofcoLJN*e-ZKWmFT4GC!Y&7THZ?|Sz zyR}g|jiIzt9CACo-&r4p?X=5&r{v#TYZIUy*lXrCP-|-}t-aBlPGgMFT1jYql$Jwl zeJm}Rk0uYxc~{6k8>G{0O1sM;x6=}q_B-uzI(1gahFZF|rO@v0F`Z7KxODa^`nyJ5T$m)2vF{$02ux9J935Msrqg=PA0j5zy}LGM#qQ+C)oh;-l^D zuC;v7rnBZ;JJO2gyti*Cn)?|f4r}rmB;O87_|H9rk#xh?oZ&?&^$(_I9VKv%hFgo%cKN5pnBm%bL_VcDqxjdM}ZgzaaCzOUVOyfs#{e zd4VOz_R7V-BX{klWjJ3kBLo%3n>xAZEgpyVAKa;GuV zl9zhr<6C{FA&<>@Utq-@+v;O+38xniU;zSS>7Yn8FCa=XG4*luJ5H7L6r)zbr$Mw`*_l2BX$*r`U z*OF^^c}rhiM*=*F@$ z+YKCYuL~PQcGu@zmT{y?<4XNFot|xPXo0*yoH|>G=3E#0TXj}rO=i1yqwj2Ax7Kwg zgZ!8qa^z@QE@;Vbmbr4<@5mQoX!#)hTbjURmYoT#NRoYOkb+fe2 zMoWvM@Ja5fxt*4UmS1TBS}Sa6sf^|vh1i94ZSNLKr%%_LP9Jc{?KG05ou^%Pr#s~w z?$)*v+9IV**V>tu`Y4R>(JokL-A2&rE3JaoW?EW4AMJ(pX(cu^f2Dm~q&jV3X^&`^ z{hGZ~P|s~Yr-u0-S!d?9Mr+wD?N_5Y$F!MMTX$&vS#!=*O+|Cg0B>9H+}txjCD!B& zuyYM1{O5aw8S)KvyrEv=%D)@zAAI%%L?P@2EiqFCC$3%s>aY24IX z>uHLMw4Z`wSaXif0S>uW|8qa>O_rS_)kS3IxMvjEJ?@QI#&I7( zwbD}jY<-8G2U;{$XKK-$*}iC|XB(R}nQf!*)EQf@gYIkm`dpd7rd4LP$2jCpV4Wpz zrd^ODe@BiuO;2D5W zcxOwa<=IBEu8&t~KLz*jVsW%19CBX;ubc90U2UU}mNnN(!w zxF2ewk5p8at#j!B>xqavv!v^ryxg2Rf8&t5@Rh_&HOub$%m=xg zsxz&~PS*{sI^(d6uG8W5USow`pO0or*Pr+>I(1&+kbB(ITXi0!U8wV?@6`FvY<+ab zLte(RGtd4aJ9XY~r03a|WvkBD>vf&Qp*2=oHm$9(v?M-S2l>|9ec%3ahIAcIb$y#d zZr6Q8bC1qx+J&xjFY-M)DXj0)XF%S=i`VHoSY)T`yjGpvS+=@9^-8aziqLu}Ex*>j zG}JRp=c6T%rwH7x|D7&fXI9!{4!JA*gr!}kU3P6Ftj~;Vq21<%;B-1mYm+T)w2xM_ zoj$(JpiNa;Wvw-~v_eL6u3`z~j*Z*t572^?_GOytG^3?GphlqC<(OW1*gRi(dP$yudv?7#MJ7<6C*ah4ghTF`VX7r> zq+O`FmMg34^?LcV;>v>{pIJb%vx+*1?3@{DitN6#s>iZ*W_VXZAE}(sf|ZtBYsoDw zx{ucHZ+)J*H(9zath6&6a*zANdU}QLrd@Vz7d~li3bYq|7@basYHfq1b@tJ=%R6WH zwOkh3GNlD*t-qzEGFn7B6!fw^TU^iW-6ZL>vChaP6TFn1xmD2GwYvH!jK_9XpV>N-ft+c@N&4fa{`Vy}GgS6_LK z-0O4Yu8?2PF*P^V@<2wDM{Z($en|j%o{}SIxv(Wa4|Asx`#bXMuRLeB=H2vf zX&M!jyn;jSG~U=&0xXGvrsZ=^r{PzpBX2xpAAw?zyoD%htK^ zS~T z^IqtDdp+CjoJ!w^Ds73@f-P;5kCwc;*4jghr@F2yn)Ax7YIR+jHF@R!Jl*$5Rk1#G z#(}(t7qv6bPvg`Yh-Jwh+6DQaslIXpIp@3c+{#Rte;Xw)((>Q~vBD+T_AIo?ZIM=2+@~ygi-HZ+G7{Bow3CT1Hi8P0^e+aHEEv zXK~hK4b=EfoqMg38c2_EXJBlpwV~gY#K|~86ENsez`ak zcsoW-;C6LA+dpX+`po4%70LYI@ECymIXC$jhv6-pfPI#fS7ikwJ(0Y>ZY)jwQM{PdjP$h?IIk_eO;+5mLvRV(_t@j3HA?H_e zfR=Am)w50Il{ZdwYF6@L`LydEwI?HG8h3d~I@7quA$J-xE%_+z!ZhlAN4_iPURRz3 zIU(2V9JRqByXOay-J{l#WgIo<9^G6I&(=MYmxlHm*Xd}vw3gb^Qut`CE$vOPbUj>Y z_c`Rg9|*6)^K@%FOS|k9HOcx8ZaK7qN}H;+EtWRSN6S7(p9vd4`)Op#>*PS~H_L?-r8H(pmv%kNG9&bedjkku2>8qou{?_sDV;bLZw6 zCY{b>%{c>H;gEXBvN2e0B_)06NwH20@!ABcl{SL+FA=2roFf+Gs4!NE7 zu(YeR%kH#cU)||?XhW4YM{7kbZLE)$%F=#@mY+3e9aa_1c?TFpH1~dH5!U1#;9p~X z*WvCP`nnty^4;;K>(_%-*GDSqBXW;+IbA!izH#zBzI#M=a%!0WbfqoRT9~Cx@zH9? zXEayq2(7Zx>T9jOrIq#3npvMrQ$UNLv=~}TYiaKWc{_E^3^nEbj9c4T+E1PC;FqIw zp4r18ceOmNpy#&IXwKZuTK89nLF=NlE?V1WX^o8L)RxJ*BboPuvO4jczLm}O^%{X}-oRMD(D zTeEDPsh+&oSIlD20+p6kYZuDvBa+By&I~_P(XZd<1ElNMA*QyQ9CDBE97{V%yX@LJ zT-CKLgtkg)VOncrX#;$;Bl5Y`U3;~lwNqLNt>v<`>^@pU>uy+NXepKUs=w;=V>vyy zJG9I0G-XOXx9yx7=6{J#JkBv)qO~KIHpxfh|7M$cXWJfHh|=n6EzHtN`)KK`cR|UZ zl~7tVt<|@*H^2P1)A80h`FGk+oxUAoI^D$~cZH<2wBA;Yy6WNBS}v?bPgxIDCArRCLHd`nB?qjj>h4}GQ6!b*F{A-B`pW%Ntvl<9P))>c^BNFS|p7OgdbwoGY3TI*qH1$?xr*5`{j(3&glQyK`~qqQ8C*3w7I zVg2Sv5ojxvmRW1>OX;~K@X_L1&u={IC7m`^+6@l5ogT8Z9r>QON7VWY-ZE9T?wsI={ ztvRJF(pq>)eH146Xc4aIGkIHRPluaMYin(brIqy29$QcNB!RX_X;HK`*wX&#@!wAK zHPv%FP5Y_S`byiuA$NsTu(YK<+CJ;LFbJBz(mH4@o~70E(GptsopM4uGR({^xz=u$ z&_^M;IhA8n-dsxJ%e z;ZW0QfYu6IS}Gr{M|@q|yKd6y9Hl+rklSe_OFK`y>`o^P)!IsEb(J<qs6lBV#S7bY>1iL$F8c=7MAvicG;b_O{6>BQzOlfPh zmd(?OH0ZMDCwYPzKZk2qrHP`hyBptNJgH5M#wRXVLK6m+Vrx)e>V)qJo znf6ns^OSaoL+%O*v9$F*T9J#ow$adPE3KE-YFpaRK3WI*JKg5W%W^UIwq&h8UX@AfzyHkJby0Dp3>30PBo4GB}T3kyT z@1vcJppR*5Xg!oxQ)@Sh>Z4HHN85QtAJat8GAk{T)|Ob>%Z}bo{pkNuQ$PNsYdcB% zDR}!A>eRU}w~a&Yy^$`Wx%WmEvnKcD()6K(|2)B%`!Uz)-XHD?d5n@9YdM=GSM+U-I1=>8;oO!kp&AHDxSTuK@wOEsRp6}&!9f6J#b;^I8;8ADYbt=delpIUT zMJ@S5dspuJ9eH+JEgz+SOA~n2*VMd;L+%>*SeWPO);!-Ur}&PX!}<=rBjhN_55S=;~iR{$b->P#0%hp{@Kda8R&^9Qo zw${p6T1g-6X`t>p3A8p!i=s7uOZ%sdx3(BK`vnZq+G*NPohDb>4i34m{c8pF5m{<9 z!Z@+C^*qubXczcp;moar))rY>J)@p; zp;-v>JeHjqW)#_3d;3Iozx(@Av9zPK%YMcEVcpf4 z3oVMOEktWIEv>JQwx^ZVW`hC+< zx^BTIBuBo_A@_WKBfma6XK5GY2H%kz57Y8&$o-R-pTS(X4`1Iy<{ggv)*V(MDuhTAc-K?`SPqiikbLg&@ zLQbmW$yyH2r{_7uEBk#%o@zaBTnX}#Zd8J^+6#*8%yWgv?mRQFY+Vt4vOW)f{#hn) zU)34TA$I~jtU9mKE=*u-C*KJSwyp?^A&0W;)HzXPr_Nkfor74m>dX>dulzdDN~tjg&+qiIir@l zSn`iv`SAg= znn>4Q_*CR{eT74A*MlwXFzvFn-cp;Z&4#u`X=Aij!P0sg&AB=^v)&n2f!0B3g|!yX z(lYsIAD&s~|Hjg3YNb8pklX3a+ZqT|f#JYe-Gwh7Ew7|{+UoEhEpmH&6T}3P@AP5s1Z||!8fY!LrIqv1j#|HPpAuR8fkskKIyR^4dM$DlXX16i4&wdV7t({%#ToUSuiT}NO|UOGow`gT24eu>lV z`b>T4I*yWebI5(^{Fy_~a|P{!+~qrR3;BA)m3u+n+0M-KXOW#1zEEU$o>f`4PT{ev zFX1yn>#DRLwbs$nzSaA$R@%DFe~tFjvHq;Iqa1RNbv{ema8P0=tk?VA3 zI8JMeEv=uA*2B8}Ume;?zFIh1F|BpBw5&ebV(ZKK2+*3b=FISUZSK^WVFA(H3+zqW zg&EFk<~zg6xAhEHFcan`b7D$j%DiEV6ra zGP7)52gg_siF~Uib+%X9GY+}OdXS~vpk20>GhDC9P0*ewZNApZTUw}(R;0ULIjx|b z=M$%MbZUs^9GxGm83wW@D?CiTR}y}}>RF1zbX>vY$9 zI5o`wgVI)N?N>{i?xQ8MZdG=H_9vg}oZ1>`t)-<^@X;p9uTHvW+SJf~Ra$JVWw*4C zHU8V_SoyVYSGz#_sncpo+s`3)wY*#7PL%C8?Cie zme$K?&Mm86){T(L&~EXG+*u)owC2gIk3vQt?V0ts)0b+}=@g|s;gH+uW=p$9yX@CO z>4&=04bUnoZLZb^Sz3sX_R2axw}AGQPm0bNpqgmT8KAsq?)kYWYjOtgZ%7IM`2dt{ zHP`9BPlyJ2rIO!N<*pq$vL)ZAU68jlaAbA$EbPat+LhNa6XsuwW#{P37TGyJAI_wY z&KQ=h^K+7&dY;XpMORuCt&O*|!bXdL50o9PuWjQ&liHkleyyUqt|yv1&!@BtUH7Ty z+x0~2rqK>&!u(tFsn?O0YB_-=Pxi_~>-)-AmTREV#ei^le zyn*X<7F2C5x3T1sUb(<`cjVAz`gGL*atkGw)A9^U&f}F6xUxE34Q`?5{I#Og9E)XM zt5rgyJmpXz{==iS1U_7mRy+MS#Bb1^kl1;s#{;$b--pFMBXE^GwAcRx?qcML9(*uD zNR(ytZyWG`Ji7BH>Le^-PWp0WKi!n}3y;TUSYRE*6LFlr2=EL4NY_q^7eo9jJ>ayT zzI^cuzmIqZ#2;~-zU1-?KZST~#Lse^zK-+@--P&^jq-E1bDX~3^$VYc_$|cebDX}E z_X{76_))}1bDSQE@C)yN_*%rfa-1GM@e8ks_%y`pahx9P@e41E_#niKbDSQ2^9xUl zcx%Knal9nQV<27y@i-hW#qn2k8lXAkL;PJfikIg24a8F*euv{_IDQE6$cP{3cv+6G zLj2hVnZtUHm*eIbMyc)-YI39p_ zZ^R35yb{NgBHje?R2;9&@d${QMLa6Ut8hF#jm#lC;?Lu|gw;&Tyi#__rwFNOF> z#LIKM9>=pH-Wll z9gg?r_#wn2BYvFYeK@`f@n>tX{yE;4m0f_fTya2}say%*GO%PAT@j)DqfOuKNqjG#O$HP-%{UiQ7GsTB+ z`~u<$5WmXtp&Z|Z_~$iP{~RC2@kNOLh4^xg59fFY;=dz4f#V}M-UIQii1+1qFvlAr zJ{R$393RQ?QizX4ygbK8aXc&Hoe|H;@zETQhj<;t6LEYD$3LdP`bYdLeM;FsmgDyk z&w%(Nj)!pk6ymWFKg;oP9N&ca8-Bx2>!0JH9G`{wEyU+@d_2d8BYqU|(Hsxscn8GS zBHoqb6F6QI@o9+H357e;&#;>9^WiQ{PzZ;f~+j!))z48*G-9*5&oIQ}X*)<5F! zGE#gh$8R8>0`WT>pT_Y+h(|{JILD`Rd==u)R$={fd zC&T(j{5d_Cw0}OwFCd-(asHzU`xkJ07vi5+V*PV`A;%XX{ukoQIlhSFA&CEu_ymqG z=6Dapw<6w`<4ZW+5b?Q)H{(i! zDb_#YU+K}Q{VO?sAMp%`KjQc*j-Nt2HsWVFzMA8k5P!1*>!0ImI6e#UTZqr+_*#w+ zNBk(_qdC5g;~fxRi+ER#ujhD8#HS%%kK-FSUKsH~h!^MhMvkXNyfxyPIKGMFF%YkU zcpQ#z=J=~5SpSH>qhI>izlGyB5Kn>l9gc71_#wn2BYvFY+c>@o@n_4i{yDy#e;v*3+&+%g%&x&|w#B*}| zILG55UI+0+96!PFkBPAU5&xQo;wL$NAMp%`KjQc)j-Nt2HsWVFewyQ(5P!22>!0Jl zaeNlyw-BGt@!vT<9Py)wkLLIpj(0$OE#h4{ewO1k5ub*5J&vE_cwxi`Azqy0=Q*Ah z@z#iE;`jxQ$3VOa;&C{Bk>jrtV*MljE;Yq3ar_42DG>i3_TB_MiYjgY?uIl31u7_D zkS)=OL==gD5+#^G2vynvR0I@6RE7};6`_MVq6U)=mTe%0KxU@`r`57XBFdm*jT}|3LUXV6aIqm@#O2sE#VIeA56ZUJWF^$ z`1#};$lD1YExZ%?Mslz4eBn*Vzao#cmhvxr7mkKou!(%7@YcfDkZ&fRC;ZTJQvS(n z$e$IyRrpKfTgYwU%Z1M*|C;AL$70kZ%#5A$&V|gnXIs=E7Hye@FhN@E@O*@=qQme?s_1 z;m?roAfGOLvG6kTo#YdQzb5<^@?GSY37;)|IQee!Y~j;{_am<*PZvH;co*_LA$s3bjCHyJjW5|=p^MscQFCh1kX9}MvygPXl@>ar!32#f@lze}Rlz-tz zuDtIUImBL#K zUqjxRe4g+_Pf7VFPa}U;_*UUBk*AZ}!j}u5Nq!pnZNlFYK9&4*@~ebDC43Bd26>+F zQsD*UXOL$KpD4ULc^C3l!iNcOOMWK#{uWaHg&)DE78Z0R-y%Fi_;&J4@@2xC3tvHg z7WtdPfBch_fAVhRPYB;A{2B7I$)^inEWC{T9P$amUlV={d3W;5gwGZ}ocvt!Y~j;{ z_apB?o-TZx@Gj(8k9<#aDgVOju;8$uC;2+z9fWTp??qlAJV|&Zd2jMp zgztD#%0Kz} z{37yp!bc15MBbO&D?DF#6Y?DL$ca+^h3{$u-j95x@YcfDkY7wbPxzrHr2LcTl0Pea ztMHe|^T=)C%Z1M*?@xZ4@VA6dB_BY3mGGy8k0H+|&l6rMynuWld8Y7*!n>0fkhc;( zOn6)JLFD^Skn%75NNaGDe2ef5;oHeA@@2xC3tvHg3Hh7Ce|%iZKl!EPPYB;A{26jT z`E=ong_n^Rl1~u+n($l52a{hWe75l6+-$dR@_%PvZ$$v+_zp0de;YV75PbA+W zJVW?)@|($*32!cZ1^F%HZwmkMQ7QlAw~{|0e53GZ$S09c7rt0{8ToDG6NJAe{1)=b zGD@Lkvxyr7JHrSR6m*O1R3 zpC|m#AEf+~-%tLm@U6mMBDcwH;md{3B!7VXHsNmxpGqDgze@O1!pD$5NS-IWRCob- zm^@SXMB&}ZXOg!PK1_I9@`uRxCrSAiegsEiE|^8WMRZTgm)0WiTo+@3gJn@E6JZGe?|C? zho$_JKSPeAl^Ook!XG35GdYfiApb!4J>+x9VV_6-g7ER=&yrih9~3^A{5kS0;Q`_2 zlRr=1PWWiyoycDx_X^J!-h})`@<=1~-m``8Y7YJq`AXrfg|8w13;8_Zhh|CnCx4mz zS>aoSzeN5Dxh;IT@R{VVlHVr$E#Xti|4M$9@TY{2A%BfLPk5>D0`k|%GlfqS-ktn! z|&k(+y{7v#@!kY_ULH>7g3gOR>4@vna{|7k> z9r#A!&yc@GPGttZSa=!v+vIG-z+V%73;8?bmkFOOd^q{LQ0sMV(PC9~j5Wb201M&*tNy011KO}!e_>P%U{>c}R zKP-H;@W;qM0*_9E=N}22PFPO(TfkQZP9&@#yd3aRfg=bP66OFF3+zw0h_EZ*?*yJh zxR~%{z~KT@36}t7EgP6s?%epb*Rx#zyE1P?-tfFD^M+kvmh9+b*?TOmE_gik_4U?! z_``p@t@rEeF?_DC|H$N;0soD)+`BZ3>kIssVul8%n)Xt&qUHpg1(DI!^e#2+3bP{8 z#LTKN=YQ4b)8)ZEC(odhyFNH*=!8EHoFTgEmi?}5C`(sCnsjcV_cI-4cs zsb%eA7g*W;Q z3_mxbS(;jChUX%=Jg%Ff8q+@P+=lUkY446FdqQHzG9aEk{*XT-C2LvqET%=O$;BD| z%Qb`hiq1CeO0(G!6#z2?u@U?|!pguSAm)4q9REIw6ymQ-=`PY&fu?52Og4kdEb$e& zA7Fx}WUVqoJUF8oRF&!ZzH5$y}#M+(p9FBufo_b&{{nkTw%;!B+sO+kFJITgejTni)?-YDW_i296_l- z=KB$AHxCNKA7rTaGA|;3K{^(P=-zoIeL;t- zMw%H;x1yX7CGd)@wrKhvVAc4{=&7gyfHs`@hY8Ar_E;SylxFp=V zt-slKrJvxLHG!A-RNJBzDqpBkJx(zO>i<^0#EVbltAi-$_GM|7orn6oxX-A((RtV8 zjmf+A8dwjuM#3$GP8pWbgGqwBFU;VoKzB2AH)=tB;0!ZdgtBjR&PCzqWM0JjURsVI zNe!q(4fwKXn|^Vb@1!P*ktJ$@Dhz72b{XI17qChqsk^U7_9j;OA04 zP?C+rUuu-Tj~^B=YKu|&H%fv#i~4+&i_(x)m$l02b`NuD$$SFSuF9%&)-~~Zl(IIu z`dUmqP;ayvh+>qFvSxQRL)Rpm7hU5iy2Y&crinj%0~)+~y85?4&wvuL@VSrgB!WU07S`M)a1S~BayIiuu7J7}^44WM51;yT@h ziFaQ@1qeCvz$9 zV8d5~#?lHm&tjwHZx$*29GY-N;5E88idILO;sN%6{Go!>z|H>b3L|(C{is3&8RsD> zs;M!vhoiaVA|zv7+KFl-bQu)cDx&$HUSw%PvSWN;$06YJ$~P`i<-vzR%#`Uz#ONtxB+_{;-MM ze1PqQ6SnAuAMZHyGJkz%D7txWun zl*2QKgtXBAx=cue^|NI{6^871VfY#Sj`Fb1pI+f_woJ;yCn|pf0}{%@LRA(jtZ;|( zSl9i{qNN`*x`DY8;nmPsQnzC5SmgRVRI*@Q(HSvByZBCjQ?>NFDF>U6qEV3aow89UGVG>@l)XLst+;=53ky%n%Zv@-pZjn1WTfY3i zbA0xEsQY~9xlngKK3n-6nuSO3e}6qb3wKzC@!978*!b)!8J}&|UE0m5$BfS?Lc{3a zETc9(J|jin*1s8jTa4xO_-wNtpKVSYpN0Fv@v#H6Vr2DyWPG+V2NmOg;rI-i@4sVw zHYYYdL+bxmjn4|Pmia$#e708?=Dq)7dpY28WJ_}7k53#}cEPKBmBVl}Y&V-*CpWTGNoL6>|72{I z1I%YgJQ|xK49-lBC zTe}o4-SODX;^2#o?s#lIrQw__g?~t{#$(TcFJB&Mj}(ne497s~Yf_mz3W zuQ2V;^K6Vd51V1%+-^x8%wA!99rnG9`{F*Hq7$KyOnZ|v_CjWkZ`oPcnQiaR=OgP! z<&C+fzNW#?NHPqf-3e^gZ@$2?>nt1Nd*2ZK0?e8E2I1a#hYJZsnSLAd!ke5!*(kMe zh}%7K>v!LicWvaBT7;lvj~DU1w@3|KKlDJeT#uM>;Q9u}>+!*w%1`xv%l;DYH|>LX z=Vu5#Bbbf*D%>-IZSiuyeZ;vJZ#3=AybbU+31P9`M7hkykMHVfOB}?V*)lA?@(_@1d(^YkL6bb{~IUxe-Iw& z%u@dUqmPO}hhO6Vh`9elk+fefWzzQE(v_R?+sx-2X1K*aaB(*M*5fHHFG@#z{&7kR zm5y!jK>FwEfj{Fp%?E@_5{rn-hH5uuA!|1kb^j)5Vvs zm0ex$--s`L#Fs4ka(UdBGZTDyRD5|A&%u|kV2fv#vHTvb59YuFS06l%`$T;(RNgVK3*Yfi^ua#F>Sy)A z#rg%*2N&S}D1ES_cT68_k=sOlFlGm0Df(dF^(qx#KZH~qrS-v|vHBhBX~@l@qx8ek zzY&TjHa7Asa>I*~@h0OJ$~G%@H(r2MUr*Mm$QsOJyY0mhv#N^o#F6Ps8hMsSE`^K8 zJ5aEZbU2RSbQe!#28OpCfZk;_BPR>(|4reVJJ~&-#Dhpeyp3HTQm{M3Jq2oWAWJaCt3-(2HtC};$4 zl|RmA2;h41bzG?ctc7>cVa|JS!me}Jx4mZz&jTY*bKZf&tYyJ+tR8-9+J!YG`@FZe zDr)Mt-)4B^HPQC@#ygnO{E%T&h2*=eWqEdmV^b4`-xgdyA6esZuB<&&-yqt^@M6Z` zcUomh1#tMQ)4LrWK03X(?fQl3Esb>d)0L(Pw1MtwkWG>0s3IsB&ENoP z$tYcmJp0$haw;^H6w8UY)5&xlZjnsWRI$v@k0+BUt|wogSS+t}(&0psX-HNJPyPdU zoaS_b%YU6t1C4j8j#Do8K%eP!LNuL5$0U*SQFmy8PS7oJrRA1S8stphk)kw$?%C{z zi67K+;8aogNH>Z(+s|d}$a#`-pTU(%IWF5Ev8%bkdkue z;Z7>Pb0%(e%B^)y7@$&){x~hgbu8srJEK=R-}Yz9Aq{+OdNAe2p5|=r)ga~8#%lD3 z^hn+lZI6^&jFe;GsOM~5qu<5DX82JRX6a4&cB=A~7~w@u@i|Dob`6qm)^XF%vJaWz zj`PxxeitGAu*6FJaa8&}0)_3|eJ$(o7NvT7`R(luRPX79_P18W?wIN|O3#I|$(#Ny z^>6>;zS*9l7mRmKRBMfBxDW+j9^R>PA@R0e3n#p1`fH?rWWCumePP zs@y0R*RkrZa$}ft6`V+J%;+xFeFPjP6yu-C46R;|&WyA0u+9ugILFy>Y-Z?II@*Yx z!ZuBRLMv?|%Ivl8hh3esx*PK9AlC_@3I}mdf?biAaz=1Ggpm*NCtexQKZ@n2GEtK@ z66w0LliQOpg4_;i*=sC&ugYiNr;w>MS2jLjY0K>3QWP@O07efFum)7~>Rfo9o8~=m zCpE=61GhTOtDWRrmF8?mgdX|G-=x~Szy^q5VDicku?7%t3jGq$11?<^haJ3 zYtk4tzr8C~0p7vGu%r@Uw2j}7M5_D|bdPTZNlf5e8Xa0bMAA)Ex;fv80I-|}) z!q+ib)p#kEo{9QJrQ5$Mn(um%m(8sZBsTrJY<>tQQZ}RA zB%R)e!(UxCXX4=oWs@eK9%pa#*aCV~`=6*^f3%=`{-vfoNn(VqPcrV@_@A2?|D1&Q zLvXzK!-igc*b6Pl)hW^A1x1!B!)x2)6;0;q|*HXcUn2!kK34X`W;L_&>$$~bgQ^NS~;0;V%cbD zeN($iA{W46LL$eN(~Cc=oVLf5Q#(8yQ%<|JatdIW9qY-(l#`_IQR5rLn%Y1I`go&^ zzntyH`f1$h*6tD9>R8t}MHi}A(;w$faUF|wEg)3#jnc(2zf-JTF%Wk%SrKC$f58FC;J(t+IgW^dPVt^F#&=z0s$5OiL?$B2%!k%UjYL;P>#a~(c?-@>!mKX14x0{wON)A# znTyQEm!_C|*P7`|l@BxmTlcL>!_;L+!%XC@q~Uc4I)qUyML1_LE|ai9NOZLh`JECG z_N*&*ry*VIRrs%Gk-9XLe??FWx zuhBaYH_lb{Mlp-~Q}iCf^*GWPLy1?pQ*nJP<6lD<^)p^P)4=b*3&jUgZf~MI?i{|P zwj$E#Jv9dSyaq}EHKkF>*Nsi%jg_W%p=no3KoT<#_b5%W5e9?qVccReEhBAwRena6 z2a6H`f-jx>k%Yz5GJ1FdJw6IDWVHM3c{xk)qDuS?-{GBKs`9PQ+3^JcR$Z}(H!?Yj zX*}MBMkb@MHb!CXVutVXQkhXNp{$AJGhgI<&kf7UQhkgne2v8$;e&`SN4#MP(jzw zkgoVO2)|&HQqT)-dW8!5hKBUjZu*7_azjIMwVPbH83#AxLIo2;LndlB6GH`)LPI8L zH_XW>^*wp$t(FfA(e`oJ#gtiX zu5?UqHR^dQb3*xm7RW)_%k(R4<)SEB$LwXI=^F}-Khah$DTxIwiJ}Bsxm}o&)-=IZ zZY?PGGBvQ3;~rv|l)(D^IJR=}E-&_{{ObKo&thd8>jv?i1PRtF|C9GK{l8={*Rc)q z;D6kHrvI~-L+$f8b5H*D_Hyg~>3*jFKibP(f71U7d$}|kP9>OOHFsXYri_2Tjz+%x z+5JrasrGWCEHo(%?B$xRLUY1BMX;AUocBxZTsdzpTfz1+TOs;@bD4?4)MyB$COU@H5_12e*q_H8t|T~hu1WxNmNybO<=-Os5Ao`?wAdq+lQ!GuF1 zlO;Zn4!~zDaKUG10KA1($|{eh#D@dee^pX-qnW)ofI-KQk+etI6_;{;+<51*R1Szj z&1m(vC;Az-LE+GWrOEDkLtMd??6}bkdHvxVF{ln;RGhunC`F?HZGVuP`$30gqzug3 z>c?<6E!sxgm@LgS_Cn9{In^B5(2EHu58}ku+j1(B2 zga#yY_f?u{bJHyb^L_B7#~4vgn#(^!&CnHTn4U)bV}_??l!d$Q zP-W-jG9))94dtwk@d7`hIv*O7YYU&T&n`~uP8Kuv|4@@|P`Z(uc zs{p5%ZpZfZDON>&5sMKYkVhp5jmcE^s0CnXbl6N#m@##0*5e>Wg`e zcG!!SY-WF7RBPGGqb=huW?9}Jx%oJI^VAD4d<^wn>&!nzUYQhJRrD+(_ss1oa@}xH ziZa}kc>IXNf+Q?{yC#-4i)O{g?=Y*xAq<$@7?EsdF0||~tct^ptq``zWt!ouFx9jw za3>ck?OqvKJM$JWI#?ezLwz07z6iS{Q=A?MN!Bud`uBlM6qZsX3JXZDrO+yYQ@Fd% zw0puKwtM3YjO>q#zQIO&jFvxD?(Fq=t7%`duVe|Lw{vO}E0mNGor3_tiwraK6EpjV z+YOm)j;(JXx^G}dH>Rlul_I(i!Y7HY57CwN)_8`M>@fYIOcR4{b%IG~D&mR$&`&1g zX|veY8Koye$Ega?JVPd+N)9*5TL9>>jQ+d)ArxX9)8V(rW%S^*R33F5Dv$HyF6mZw zMeZA=oRzYsFPV)?AqsAIBI=9SKNZ@42_WUn5KPKTD!o?tqiOi5rmLLXMNBW{{Vrz; zm!z|Q3S7-Xo??bPgF_j$JaMJr%W0mm%bDuF-J+k*53!eUXJy6Xs0Kv1axfCjK1(3wvoW$r&=K!-~#|7jVp{K)>W?jHQNC(!h?m zbo?yQZz!m)e#@%2K0*rC*K?x$18!Nyzp-Y;x5(yc88>PzlR~wVMp0Hd^%?fyPs|vJ z@@s_(5yaArVu)}M1%yc}kuED4>^=;$X@<|n{^cd{^Je@u)R-Gz)0kh=cQTJpaZjA- z5%?Vocyyb4;tA|j=O&ys(}SnY>~RoFU2G@N*~W#r>;%*S30}uoOJri6l)k~sd4{}abAXHkYnRqg5@5G^DF)S^WuCCeLXhL zeg3=i`3r1KI5y4?EpX%fcFuoZoU`fc(Q)=K<+&KFl|#ks%PhGw8TN@8)Yl>CR~Cvi z7OY%_Z9|C=e6#qrnB^xFGK#{gWI2i=3_cmR7 zP;N~->d!oAmK@v{c+0Ffh)S~vm1ZK|k_naONTpdjWwunBN~>a5gGy6tWp6^I`2m&Y zC%ix^&Hd>2v5(YAt@+XH4g-9rKLbZ|Vdq9vD*BDc;9QZj*Ip7%g63B-gXW54LfOUJ zSLiVn_guP2^CM^@k5{v(Fv3IY{lTw!@(*5xe^8cY@H>5|L$5wD)sq%ixU9V3z&5+ioIE%o{;iS9~#4(oj(OwBPH{6%gBzjQa1$eAKC|T0K7Kg!s$$Rw<);VLVh^>}HHa3# zurET9pJdvpmnbQB6^aPG==k$uWH~`ymOP8f4R*6IC{~`hA05<7YToG&U0?H`tYcMH=7EpSU)N zW;kh|H^ION;~kNI`g3x2Kd-Pp+NZiG^_|U7n}u|WjxSE-^M`xS#xJ!Fgl;nW{DB)0 zxUmQv=A|mQeiQONLiZI;V}JTSE7Z~q_d-%*a8R)eK2c<_Ea zNIyd@hO1{IaJk>~R-hAs=hUxgBb=MZqg}+CtPQ5U&@B1Ri#5z2uqK5OWBq>Qev@R= z$p69gehkmMq6)+FZp!oX*{9Dl!v*j`1r4FVLn;)s^T^r&ryk|Bt@ihG_|f!Ji0UBP zhIJ7OM|p(@#5^cD=$(gr@|eberLtl#5!?i?PgGu4daTwJ7QukQykSxX z1UqDZQl>Z?IuKF}x2mq9O`5!YfhP^_eC9E5xNxJFf`Afp}g#akvuN@ofJ%cI2-nQ zJRdPloITK5&Ib=2sdv5vb1vTi%alTx6F6KSg{Ku5#5fyws+Se<1+_54aXxt;hEHMN z>_v?|#dkEuybfIcmeTuRf?~Y$KFf?5`bRn6aPCSxa5aX0j*o_jAy0lNH@Ohwpy3Wa zk?V0jx)#Y0_KmD*P6OotDpO81CUcO1ZICaOzP8`PRh2Ip7p5~(P>g%)9#Gn%;9+F% zqY!Y`_<5XijORCq=*Jr&`V1n>+}M!B+h>)(6+G=XaDp=igOmoD{HJ#DOnw%Qs(gRI zU%uQq&vo0NyhPdq$H}Y&WhDAi^bsbmenww;kC@pqFfC3lrpK?D?V21G`Z)Y~Mu?J7e~-a+~acwC8?TGQ;sr=a)yS*L{_( z6&f%zJ)+w|w0doxUW?jkZx++*9e+@5U-Tq2X<^?j;$hJn(ZAz3$Z*F36|xiA2r57N zN*Tw_Lo4gDWFOW+5I%&Kx#+>C)$aE}9>dT`PPfqfILN%0+C zPaq9C&D~ZCJ1u2j=}vh?MxiSS$4_l&jzJQ-h>=43P$8`A&cyJ{S+Ij+i2WR?+4S=K zoy-ZSKo~9tkxOuEh4VA&t>9uK_%wj!g=t(AVUMC4XYJi=rNc6AS7R%Em*ri7p?WXO z$JASUH(Ke7F@;5F63HEIOo|agqa`5~>J1aD7^CZrq@4z$>I=HAZn@@mA;aBkM5Web#-=E<(!pAXy<0p|$ z<9onvc{jslOjk2|-_(Jyot69AsoC$LgQ3zl0YQn0d?cbI1NSMKB8J`~?d z)7u>FPB}{EzClIRi+(U}ZG!)e@y6<dG`Lo1&tc9XPS0ntC9IcM2VEjW!pl80K4ps9kXeT$vyQH(s;95+;@fdVD;NT}g0r;XG*^(T1?gJQ z&J_&Rf>bR?aRp4RDRXT`j0b zKauCdVTl!sknt0`qX}m&LshWnf7=Xq=hqR6e!CS)XEl07+i{~7;~RFXBipgV37+Fa3k|#5 zc;sC?60MH)$1uL_2)E(;%Qx_ma-@;PG%^NOT!lFVbZ2*TMN$Iy->j03rYOk?eOXDo z;LCIaiPIj*#r4+|vm3s+qGk)Q^P>XYV)~Of{#Co2ebG(v9Fg9!_}CLZ#=hi|X1=fU zA;o#J6<&Z1OW55W%1yPxg=wY@;{^B18~(7eBxtT{czF2e7f_JcVh++f+?B@``noF1aHJ6`taal?;a$b47B z@>Uc3{sqe0Iw^1J`xk|&fg4m|tMmt}S=i>Q1XfF6)yAWkeQ1qh_jTzy6uUKnR~WY| zQ;R-j^i_fL+lvbAFPyuv^c_22J-4typzXC$7$wvh!KD~;TH$AO3wQ#Gmz7;Pr2u7W z(^ts+>_t;AR4riaWcvgQFBoH>DfA9ofxdd{rIxq#idb{FHR9LJVH37~gH!l-cp7Vc z8T{q1F+K42-X=gTN(9}jdhcz5`j>Lvafbe-oTJxHFI@NN!^5Kj)db$F-JugtHkR)0 za@knLRP%Sc%O;VP6(ecsbdtsjD9J1>8ShF6lw^*UjCLgiN-|eVE_WpaN>Z*RrYj*( zl4>pK=Sm2aq()16xe@{;snwFRTnT}ac(gW7b0q{ylAj+T7jN(hu>u9p1Wl@KUNxt6@_N(hvsT1%dBB?L-Rqb0Ll34xN-YRLnxgg{B$ zMzPqH5GaY;Do%AJ1o3L8l_*-lAha@9#Bxe;6Yoxs=l6k%uuhNu?=H8yd=;D+rSG70 zC^h<(t441)Mvby_-3_jb*7&i&-)c+ZyW?6U){u@((12L_4d?7d5s7O+sNYkme$UnF zm%e`r7e-Jl%6#Q?o@gkvIg$epoHcbk^RxzWexRDipIz4 zpC+zkmRrZ?;D$Qu(0X0>2vixGt0mvN5&|VD*AmB-5GYBtmTYn*1WHn)C97Qtfs)i} z$;YmQKuJ8h@JWTq=2 zP?E7)QtCNu7p5IYPIAnS3;m9uKr1LCCBKWwyuQw2lbJ^(V5AD{lG8j zzo4eK_y!*a4Oj_Xm}9=*<#u7*?|0#;Hm~EUBCg)Qh8xw9apuER4xcbWZ~0s+RDcCO z-{X)?X^J(mhj4RL4;pqU85lT34#DmN-b{dt=m&8HIV;{@K4~=`rj^n0`pZ{hu|fLF z(I2Ui@-s%7-SS}xqr1tm{(0QLg`fWu|9mn3{-pf#(?37_YfQ6E+@&1T6PNjhW6p=+ zgtL5qG1l`p|Jj>EPeF^zRE)=yIBlaw11%UDw`-1!9*DJVcLGPZJ18}n!083S@g{JN zGk@cS)4*8O*2Ly-I^mN&UEKz$=1XL(tN8D@ z!$$5v)?-Fx;Q@EK(cY3DN0on$7xA`Z&x3Apa1B!7Vpaav_=9Wz>3H!oxK-oD6}XX7 zzj&Qn>QB;TE@4#$1^Z>VjIKj{3_bKX!Uto9z;$MLx*8>hAFlZdDEs@VL(K4FX{^H8 z3#SgqD(84HG`m7M^0uzz{ZbAqy~ALjgCEuU2W)U>SFF`PFji8N@A*ZGIQhqXK=3ToWF91 z5)E|`f8~xP8tWqd%AG{Sh7@7AcQAhy7b6XDoyn^fsC&w0X&JAsOta!STE?p@)2w)| zmhtM!G%GIGGG1MoX2sQ7#;YsSthh$Ycy(o(71wGRudYn9Vh?pU;>W8i)2ui}%XoEV zniZ#M8LzHPv*I3F#;YsStT@3hb2YO3$Gd$`KobbMabj0fS!lY|>) z1^-#`G{=iRUWvH6vt7IOC+>cx<@vb@%Rce`#!>yRpPjF~0Vg6Qykp6V>kcN6<>&+RxyZ_M+%R*ul6@<+Q(9 z*|puB0^_w4P-~nOzinuqnCpq#Q_xazAP;X^p2x#Jf8yj|xMS;i&Cv#4B2OisJe(b>*p7SP7k#zAM)`^vI$5;+)=$ zfQs){+^P8X#0}!R@u0*vN#eT%w9?mv&Xw&+o%5$lSbyGUAH@0dH*g$2rx>p^Lq%#v zyt(u0QYLeyX=85#?RA5dn9vB9Nsd4%K(g6+8)pi@*x;JT4xEXBkPh#F6q3OO?fxU2S6^iE+sqC_@EgYl!nDXW5(~P3R31O&LDqV*8`2%W>9+tKpxb1ZYjQG)~ zt2N4e>Sm=;1{*n~gYTy*);>%;7uxHrl8Dzb2JFYjtc;TT8Bmd9&;wH;m`ydgG~DY4 zWj>a@W{M9-Ku!4#oUDIb%({#qzF+4l3~n$=5lVEPUF$Y{gHVj9s|Vu~yIFM>Y_Bf~ z=cmG2Y+x;R%o#IaPUwL#7W^)UMGWn&tNq@kc!6y>6g-k42zOm>$E- z1lFKdI0^e#%A5MLYp3}9*>x&V17m^0h(QAN7H)oFpx}!!<5{{FzY_w5w;1=8K@`i@ zu+Mo*sz5t4)L2!DoL66kkr?+Apej7+ASFu{r^#_(R>k(lu!l=#a$=vsPh8VrL`zOl zGgmoxoTyY*L5dc`S$e&y11&^!Cc?VlZHQ#!8w>anFWm-pd%>7t&RL6&apriUIBh}5$*lr9`$8?A*eb>`D_wX{u zxl8qcq+fgJE1q)L)T+W4l~IIL1!h0YQrkC5#9mH)2}V6`qL;R;s3p=oE@CupX{HiB z?=3{bHOg*@NW>>6*gsSHCySd7R>2TfO$s<&d2$OIo+{^cguqGt0KuS!+>FRx3WZyo z^TJXUS$`#Ff4xI+xO|Y#+auxSp%NW`8Lo z5Ws-&u9W(f(G%%R;?#^?Kpq~?==?cZMQ8Fq5~6eQ*oM)8y#)KO{?G+X!ON5e^*!)r zb5BT}?c2`jcQ0}voHH0!Xlg6wLX_qUv1H|JTYxAB%Zn~VTkWKNgdd^yPBmfnqN4ti zvN+~0oRccS$mxY(RK*JYJJ%|I4>na{{6{&2pC}0OY< zvjpwMAZTW^87Qf!g9Gqc-Tu`kKEe7}1qRs0AIsC0CN!txXAO1ElAyy(I#kS-_o_ws zx(oW=V*feRIe~J!LM<6Gq0yhWrG;J=z#bScwg6lew->#_S&x1)`~67$MQmjP*eT&G zf%ZdfLIpW$|IdH+@7Qz``cYEHlyXpCIN?W`o*E02N@P^)fmu7bM|zX<0Ej=k#xnAk z@Zd~r@O`wRFnhZ(8%Y)G*2L07mG?s0w|<7NLYXx0bU&ct6w`0?&9oPLwHGXau~=X6 zfG@J zN6C0x=hK)O73*?HfI^i`J@bpvZvwhl(7w-s%tp%9*$Q?1@NewGFsMbzj)PX4;kJD% z;1a?5k{8>z6qib0Hs0>7^z~$rAd43ax3Wv@EhmAxG}z6;QKp%N`qIm)|fAE@*NfoQ+o zs0SRimi^#*)r&N9Dp&_-hP{ER8f^=qACJb= zK8w!Xcr%L5)y{q5OE>s}b(H9(GJ1K75xvI~qo>;wRs8D$=`8VwINk{7ytvRU@w#n@G)DR|yq~U8;XTV) zB`)uxON>*1Y0j|`6CRbmzu*@;bufKbt;*y0?Nltn`Jx!(R|r9E?@LXb!>?^%>3bdm_VdCmod4> z-Up$t^f3=Y$zFf=UFdYY{YrE6kf`;c2Wy~8*#zLj=4K8pLGz#((F-y9@3S!8ws4;E zX!MycqMvFn2S!i5zqnfM$G~{|VZ_g}G3kXbse7?Uq$M_^wqg9CH*ne!tZ|Oiufi5n zwJL-Ta`w7uolNf{%O1mQlwL}|mKYb_;YZ0@G+b4jzw}pKf=h>?)#r#nIzDAMDoW<} zv42QTdOihHH|Lfh5wiE8$;TXlFCD+sQBOCB9~~#bkC$V9^rs(KAIrI|Iw3uab0*-Y z$9S|HNsRJa>AMQQy|MHz&bg$4Joo;b3lpD5p5KUFD_P}{(>7x2a8_|n`=i~TNcV1o zNPQ;#VYH{!NGkPxD~!yl`O~{}3QqU%U3;Hf1Fw4nes2){Qh_0P1lt0T=upr5;2tBp zW|KSVyfT7ESPa6=aWqZ^x@*uZJT%&xUhlz7=Iy64i*Y>(SNL&zZ{(_h!Cz8zoE3hL zCr?|`%aie41wV`v&?;R4N6rCCS&Ii5&4R%A*P$ zO0;vbIG86V)T}h|v;{emhC{la;J0kM@MEJhoUg|C9-SQY%1?}3faJ*9ASrh_?jytT zM;Czj{1-~{1XSOM5qwhRDY|b*cwUk^|9hzsd`i(T%y4s9FION{7Mgq4o9Q28H!?~D zJ8=Csn%>X-+0iLoP-eOnyJcp=gL;i^J-&Q|u-aIvittAS7WLzs;78~NYjs3sl$C5E%C~W9>U53vuKc-}MA@<=6hrT)c;HPP%5_4pI*XcC+*|-50c_*Qt2AfxhCmn%wclPwW|% z+{3d{eq_Fd-}RtcqAsXz-4^E)PIRfwVn!)G=ZNm+04|FdrL!o&`9G60mCb3mHFiTU z#M#Ur8|p|ql=CHNc)Q<=y_i?<%;u?gVsQek*WOiX13tFdZ*LqpJ>GPRy=G|ICn=Ua z?b9^142?sE4r1%e} z( znw4E$v<=ZUf`39uurQUYv~g(InK>blpViEE@6;|&H|K&NsZ8Xo>UBY>d?4imw*FiZBentVE5 zfbDRN=CeUEKmS@zv1r63Hr}Y4XdvZ~zSMA+XvM+aXzxVKiN()~DF4@;_vhTfq z{2gwM3>%JCX7E1TFJZ9WYfVA+O5?t9RL13zYj9z&r%LQN@br|0QGDMkXLWm32bScc z0^t||U4hyR(N!QpUts)JkB^3(#hNb872A#?xe>kI?HGQ#( zpJwR6Zt98|Cs_Sq)(YR**i&flb6&u!&|XZjumO~_47cF8nr=hYBS@dwa*H!ya7v8X^ zJTgjGVeo>6zSf*RFqM+Ly^FchXaqMvz{a6sXKS1uh)xJTd9qP4{eiwDc$?4fqe5?N zg<$$N8hqT}p(}c*Qj~h5=$9N!xxJbC_QtnK(bL119_IrQdG_YKl5ZPxY-M??)V}5C z;GZs4d3*PwSk232(sK4SxJ+C53`F(c*Dp^MFLZ~?(NRu8{|G&)6wmD^AdOQ{Ch;^{ zztoY^x9xIeQINYI|RkA1*`(ANe9MAe255eyHvB{1a`4z_b0HPn$1dJ zXKMC%0&B0?oCIcQ_A(fDS$DyD1v;2G+^`YGwW;h-SoUD7DPbM}V;QvIr6<58+aL7v z&E#ki&R-4j`=9|V%k0O{W-C0D*5L#m9)*>cjB~NDey&nv9QUwyPYrH`{EJhtRIwNX z98C24&rQMB>W4VMVIy^N*MWAbqhu4iIk{YC{Ry?g&|K(Sw(JEx6Meg5(at# zZ!w<7Ee7xSYPY&YSH%;-396u&(D6jhSGO&Bi~2Inz~KlZ5wrq{ZO6=Gd6GD zNxX%Y$L-0y{ZZYrFA<5T+cv!2q;A<&j=K0R5p2)-M7L#F~2xkHF=5@l~=skJ!I`cX;Z(bU&x8%)B=XHGEywi9clQ-{l zUWez+%iwiz-n=t-Ey$bKh1Y(0^Umb;{JeQxdF`GzFO%0UdGpTVwNu`_ZoIb5n|C&^ zE%WA`!)uegdEIfvVJD%p(Jv`4>KHvIvB!v0>=Sw?+8Ex^(UYC%EV!T4E)_NRsR zUKA-+ba)(GSM2D;K-8Ix6jr4f-GhYEjM7zTA1TC_&oI(6?V+&U7{?O5um}edCzj>_ zzOU7wZbnmBUlU8W)K)8doe>;{;>jT9m|T-FN|)iKNBP(4X#ZLq0MinHr-o@%AKPjj|3m^pAZ=Bt6=dor@RU>nAK=i&M2wRq%tf!RN(;g-jfynv%} zoR{Ie#HYA3?(ie1MlG@aiNi}U&yd*fFv?27BR2lPuMy%`%L~zaD}pc5dm96P5_@mU zm+SfSrDnx;s3iXd=-)33^fRHYru5@T&i+L8Qo_ADLr-;Pis2Xt>L^Ux`75cSDygFI zqcaE0N=3zH?;eMt55~n$)ig|lwF5Xg!WBd3Lktek&GlKKCegp>^Cx0+VGOA|AI4$F zC=18fl^i&K%B`x0S&P%5{ciaBB7FYN=n*apOy&e8#xA9tV?bb`q>Vy5PzAdYt6P2t z2D3CnsnUiEyx$orv`^lOBzuE}TzxjM8v={;{8WT1H^-vF4v61ghVNx#fjQi|gH^IH z*$RJ#*?(}DGgE`9%%U)*LtJDe2D0MX~ zbxl>N8_ZH?lmyTyFn>`XG5yyavwOWoFJKg*+DFsIn)WaxXUM+@Wv>xkFjIrlJT5D0 z7nug9964k>j8N5cR_1C1r!RujCsBVwV=i=X zrZqgk9Wpmif%WacLQMAtDlmu93$u-Zimvb-(-ljKs#R-c&c|90|J}=zK+~(u@JP&C zY=#93<}Iqc_7Q{}{TI$!Y>3`zrhm!!B7Sasr!Snv*lvJ6#eBi~S(uYpKL-;A>ryZy zu`Z2lE?GHQ53*dcYO)%#p=4vpIM=YAXp+(D!F9#(7L5fGqv}x63tR@h1o1}=SeO=1 z$G)5WnK-(NX^4|_w)?U+#1jNT$?r@*JHyXYYcYRuD*QtRZsPpK5+gW@^B0*IhcA(7 zs4vwJ`(uob6KwP%QyC5%f+H3T`#7-+O1YKSNU-yu<9HoJ06@J$Q}>Q zy#|xn-NuotWR>jwo80-;EGzS9=NtHYQEZBSui`i;j62dXu#9D31~{^OSC@?+M+8Mnmhi zA^p@Wwev?5=Hq%=^jE8joh?j%e$aul46etj6KC4Vi8BpqT|Zv`hpL$X%v$TUVx#nR zcx2jN6b82%rRD10;n=>Y7b}l^Fmo@*^nueYd#8oLKUx}I%z z4YAX6oO)K7v3%#SFCINN@#%*1DUo6DOT;|Lb~v2#Ow+XuUC}Jy@oQAdfBrO5egd7T zR^^MnstI(?3Gp+ZYW70K%Lw*GHH~z}A1NlPd~kl14d8WH7^e9KR)W-=$OG`N(Ebr8 zr}JZ-->9^^6FRrh{)x#KWB$Zf1V+So``F!Y`s2%*U8Buo{g_2rDCtNo7$NM5!N@JJ zAym^;+np&7Gz@vsE3^v5GLbob5toU?ej)xS`^);#K6o@bGakPyV#^vUjo?dg=dOPr zv@*G@I>TKEU=JCcG@4psfR*$y&F++ih(A=q6#%X*l)Ont;ar4MF+`EdG3jG9(M%&| z+f1!2NriK$o|3M(qW6MT%FGhALa=u&nvy{&>?;%`^A?WP3V;>rXW<(Ts&fT zR`yVt`^FW>TrPV^=I*R+uh9}@ z>+=nw9c%`Fv&xTFM@aP3&sAB}0tfpk12gC9Y}KJVQTShB=dyr9GpE1sA28pEKI@Dq zvVntwLc7K}Q$%Y;bipQwVDPSU`veH%DqZQnD~1ll(5R@+7kr>O?5rp9?7uH%7f7Y= zHZuD&=lU0rUZLK97>9Nu+@b(0j0kN(#sTz)vD~}gJ{$WW4?t~M_F5eOhjqFw7&bbi z(d~r08_alxEAiT!_}Xpg5aYFbY8lEl6JVac;Ii0jljXH_sd(*j z3>TeK@LIjTAma<1k)5k|!Jv)5KQkqF2Mzq4kP9RMDh5zMQ0~f!5eP^j zTulDIwfC8sGicxUeZJ@aJQZ4Ip1g@W*vo7 zfJd6|WNA;}M#-D38=VCCw%Ql5#|_@h^5@P_#2LtzS=W>wKQ}apP@uPl&c`l89WB}^ z42A@g9U{0imSqrVeN!&k2J5xxjBwqCY*sA>$s#Gi*BkK3ICf1LP&Ke-r5Gqu!e^oivr z$0=W{@Ho=6x3@Y?D*<68(k)M;9HGFXqalqk<`WqwO{aM!AnmM}{i?=SiQwxYN--lP zF?>C)@l_hbR|&9!FD9$~1CTFg^qkda6k9*+rv`;8FyXOJ+`_lUlAIiJGXYW;mYAnW zL5Trl{FOgk2Ya5JcLjpQVJm|DV7bO+DJ5b3lcT|f#EV@1K$AL*AlwxCk%Q(H zc@&|pDEQqbkxc3xwFc}DRP;CKY0@|rL>|c@&+Y%k- zd|6+cmK&pwf`>AUUtn+bP_`H!H;?s-u2@|zv{yLQWH?jJ?=VIUS1qP_9=W&fIG%5{ zJ+7)C)hp4m`d&pf}cf6udB^MqEXNl36 z8^01usNzhRRQWiVVhAjhI2~=RyplLPQU^x*S=xF6v%> z{QIHai-j{R^#W)tla%1W3Zx1co~QhHu2E6PgHILzSDv6Ob+4V>c)Pr;l6PUd_S?L3 zFerDHoWU(X>;_7WtG;6NFf-ZTno(+f&U0{vuL0VJu_DPq`NB}P+d~;nU6lQw)8(;& zo7+=8N(&k?kov1cziAZL`WDLFEBmp#^&TLCt(dlvUhzHT2(H00D$e1db>LF3P#hg@ zeVIPB_VT^$ZOu|WSW=tO zuy&LezW6g*62iB=@Mk>fHa!2Vw4tzkZX5@l_8F=Tow^O1s4#(p9fE_6&_tl%WbiD# zt8;L0iE2Y}p=2-2qXPRip`GZoTs#f7z+ATY!>7DTuW=Srzb82qp=`CsvuE=)qz-^Y zkSb!P3yX#S+ano!R_GuG2s?CxN~A$aa)m77LvX##uZcaW`6m3$o-?11tU|X5M~W8C zyGbbk?tGI}Vs|3}UB)!w@Mn0zspCoJAw|+U*8dSsR=K>6z!{=XHdP@GYi#K&85GUk zp0uDVe+?~Z*>fQ`Gm>`BEAR~(a|fMaciY)a+L=xo@$lkly`3~iI#c<2I%$H47;HBu zjkQ(MoK|%1{I2E{5jop+8&0uD`8(POY0|QVFYA^~q$E|{0wjyqEiJ@{sG4}Z1#0Yx3(Mu)z18T!#lr6`){&P4Z`@A)Ic7>L5pP2#{{BjQ9haB z!b*NT!Hao{s3-Jwg+EhILwR~$J>|){V&SjU^W~I->a~^YJ^J24idu(Saa8O(P2ReI z&(?EzyItNyf@M3c06pnTUSMPK0W>lNbkprnui z>qH9a7dwlkLlP7~)zw%o2LsMyPFMZdP`*?!0oBIkGm+a`HZ`j&xAc73D2A4esvI-Y zwKLlqpCRa}P(>(y?~Rv7^wX$FI0Uezt(q?G5KFH>+cGk@(oTRmbq$s2wR6X|2Gn5S(YO8eR z?9L$@%L@dw%6XE*)Pw45A0@Wc;=`Quz8(TDj9t7(s-<SRKEe#ccR*ct>GZXtk1VT=B|`Dkp%7p;R>la7#~6V*2w89td zv+^Q~s+MZ3imWN#QIrpnAs^X0Sp;GHHPM$GH z^hZMo;Jw9pHP5Hb0{PU>KBFg9h(hhpG6a$C>W08l8IwaK_!=r@=CT29@AJO0B{Z(G zeb!$FGX#EYlxd_f4^uh#sT{dU5ZdR zy%-#bytSriQ4XtwP7FleQ}0OD(le2;>5lq^$4XV;Ct$HLBzSHhw~ulk7VqjTyQdJ} zjN;rZOh)&jRmHNla1 z{*8O&3e`Mc(5S6h%Bgs-;vBd_;x`0Axnn!(m{v_)b4>q{$_oTnMAT}!@2`JZq64cwP6vLfNindaUjKDii^kn^lH=;$0Eh%GqhOYJ(Jf#pBx{uA^P`Iub z)hII^2cv4D|6T}8Q1%iJ1e6w9rjMYwu4SW|KG40oz?RKNvGswSjijY2^wCN2=f*Fk zXRNN^5QSe}o-00j@Trkwi8ZY=`Ds~rKV^p36 zh|kq=k?yTJn1ww8Mxb`vECQok)2Y~6t46y76`NQKD2m1aSU*jMvEB#DI67rmI5a(M zeIjd225W;H`N0Ts0+BopG1cg((RSKeA@(^g1nP?yV~YQEtD333o0Fz8 zL}A}H(CcyakiI4Ns3pLf$r47#?75uL_^eM_eyt943vbqiuITOX4dDzuDUSNa=}(?I z4hDV!ch{=+{n}4ae;~is-q-RGebEz)=;h+bs%1Dwdczg5 zF+;b!C4d zSK)CiZM@g4jsC3l{*Ai=7RMWJHk;N2j?JfR&Mpfoui(^%LALY8`PTnt3} zjKcgnr`teJ>ov&lI6tKNjO;GPkknj$D@Mr|yePWNG#*IJCA{X7I>{;0k2*JCsnW6U zY4c-XZ!CO)ihai0-PpKzWuXuMLl}WbpI>HKO>*g4&B@=;dO?Dqpr=4% zw%PruMg?i{B1D5gu%1d#fV~N|yoS1TP4HtZTTja;YJJs51_Za%_v$pA70b?G7pLR_ z0f*2gMW#r(5ZVx0@EwFU(VEr>q4_h{3!ye5}6&opM&5ch3DhWzO#e+J8+QS!%>KU3w;J@V&a{w!EXgXLF9Nq+z2{8l-?P0p`~ zFY?`Xe%qbj3@BXE`#HaZonKDkRJ!T>PIZ3oaeg0meiu5wFF3z{a(=6v-zMjGi}Pzc zzwP|e?F`Tev2|9BLLy9z=KtaTdz9?{W&bUcSpEef*TOqc9Lx5Q;acx(_~0MAg=XXY z1NK0Gqu>w7gX-VSJ2)(Kcn9A1H^q2IY)Av%nwG&@p4@tmI`94Bf)ky3Y^$-6^w`7B zRUG_1{q^0Ga^y>6G^ajGf1H2}iYR(&JZ${;@sB6Y0Wv|BECX|NAqS<9ns~kqscv9MV^xpG4YJ;thyHjWSthf5+!J225SikFgIS(aPMc-lS_r1QO)bCq;$MP$pBGL`I@#NOd z7t_YpU2Fjat1g*^0eO0Z8o0xar^yGq?839vu>VA7*hu6ey`daLx;o@{?I$@#4j`{* zh|Lx|?1&mAC1^#cQEXK$R^z`(_x+9fl+KKt2{*nlfdJ8$ofIKk{^jpCyWiKrCg63I zBo;q|#9P34kMLn{^8afDgosFaWp673*3~d{J?59FF;8p`5$eW}RAQ}j{;_KJK?{`zaxq zf>psDW_Xp?9D_5{>`z;#$-&A^s4S82T#JXC=i$%RC{DaE(K5f8(%o;V+u<0(RiQls zc7Z=)AO>@S%k?V=a7?h<$`}T0@I3Z!5K+MrJL+U*1bLV1yyH9LWBOx(xDN=vzNEk2 zUgbz=;yHVr{xnyA`kdXJ!vI9PAe*~9ongAp(2(H}g9v_o^`{d3=_EHUSC0nDvH#%^ zEoKp-&pW?1WAeP(mx*}1&78YO>%6uYmA?hO7{$ueVs0Aq*DL&QbZ2=|k|J*_IR}yM zZr|KJlGitPxBOsMER#Vb_G8*ZbX-38{^p;QFLHfIY1OwY9fYDc!L)jZ{Kvp zU$8N>jfbQDf(@Y!k(=7BUXg20`K*kH;j=Qax~RzoA|B0R%7AFFZ?EjDaW>g6TQ_I(qqKt&jJFdM%{FEYm?*Z^IbytZ z7_qCXNSm^Dc|;$$({H@mTt;+@z-HMDtv2TUjW0x6Rx(*-ZEzTrzgY_$wE^QbyojID zuLZTC;WYF9b*h)%hv=oWp+#7onTZx%6`?AUt;5EhrWGrQd9{;mZEBlnpk>a;KXpEv zqf4u(KMgCZ?GNG6RM+`|s_utf9i-ZC>-2p(efAGX7fqa|t{3?jqb^6@7MwLM@Ra9B zov?D%7L1zPCPq@z)gf*T#`E6x)`7xrKFn}35`&=ov?ln?ODah;$HOtcf#1jy+vGQ1 z`!HawkYfEXZupJPcRCRIP$YtRx^h^NmWjQrP^HA>scp?-GgaH#)%pa5<)lc(4#}z) zbe405YD*bjp6Gu-pSPV#nzG5z`n}T3=1G>$?fTqUp*~ALX+D|+0+5_xKMCHe_?dF~+(l`n$h2A+!pJHWNIT-A$i)gd=oXBM zOyChU*2P<&5Q0lHcLWB0J<&lW=hW?`m0N-XCn6I|cl?=kldL8Bv7jz!wVv(R1C(ZN zVSgdbkAJV-h4cwcHVH}s(X6J4ku0P2e67y}KaDbe#A7$i?$a7%^i+bfaTk9sw6BaH zvCiJCWds}j1;>JGFza6DrVxdcQ2&_2MFJ;_W#CN+VXsgDgVR*D-vkBH2eUG`CL;)U zaSjqJMRCyY^8_7$P5l8|&5bv7R#(I4ldn{~-o!d};tLEASf`&JhLtLvAE2q5TI z-U220ds_!-{ivIMH<;b0tOEl}-1NKKMZf?C*a=HNF5|ty^Es(j#v|bh{ z&L`LgQ-*We(T9j1CniRQZ3t9$1akrsj(`IU@1}{7;P#BxbKT8j{hk=@zk$0IP3sDY zWwBA|;=Efq#0q3Wz0R;$? z342rxbVyWA(644NU=Oh_nFFb6iK18O(if)nwt|Qj-k{B-?;tK_5}B;G|I1Kxbh*xu z--ev|C$4XHRs!0mlZ!%Mq{x&sNqKUG`cINMwka}2q?`^$4 zF$nql1#9eUR|;Ro^DT3?nAZ5cAEIN;!Z|?=uXml9+2DA{Ni14v^%h(3kJ!cZ3{6qC zF;H1(se3z(eg;k471%~&F*sQATqJoGu{TC)(rryi&7Bmv%LXlbAs6G#{VhLI<0^MY zi%W{zF+%j^@7$_p_qM*QLRWw2Y` zgUkum?AQo=#Ji%|11u98WtrF--hE{t(*Ky=%O@@B+saBbaT5rqcoP#}R&T`Y0M9Jl zK3U>LIu0K%GQ#i3AVqy%lcP)#a)te}CPBdU{!EUq`V*~ICQ65z+vugFl*lu(laT^A zHq^k9Pz47JCvO#Ha9rD0EDTLc$%vkFXpq8X+j~jtEnzCyQ+V{8pXt zJ*z>^V-GV@lOm~`UD`cXjJC4xJ50zQ+mZV>)`Mr{u}vBNfopLu-jso|rC@ES*VKCd zvGt_`nvMcThr+E4S<&WGl=N}FZlA{<=}IBiz%438#&1u|-# zO8h!LRSiSh8A-zc?bwsMGfvQ=)v!S!jpjm#-<0?H^Jp3dC@G7h|62+wB1}1%(Y5g-s)HN?=T0 zs2k5yi_4;r&*-#>boqDnnw!L)0%)%Z1wsS@@3C@~}&)QV$x-|OX^)XB#Y0QN=~x-tcO zEO)-yPpW=T;S9#=)OOH+c?=PId`7svGngN+R6F4q56hb1#MkoU-9R1Rr!6}CZS_#; z#$IG6!nYs7ib*ujX$8cG5u|H;YMzouSH(Kl9_4%#QO2s%AJ5>U90yVMjiaOroXo55 zQYGx52X{RqW$mzEkHNYN-uyT%n3us(or({huuuC&wBtyh`|KcaX5p{dN*UpY;c}(K zGnXJ#UFB3?=}jV!XGAA>!|f@?0z&Z;0M~0&p#JyxIMIfIcB=DSweCw~A&kv3E|*An z`|JXWmL-U#N|MhYV67xy;3SJ=UmAT}IySo0XcCZC)V<2c@%UWCBz3HzCW|8D#tx zmMXXM$w$mDeew{KDSSrtFckCEQ*sHgUmeKpQespW=ce!%V`%%bxYz zO5v?jvmh!7q#U}?o{uLOd;kN9*0U4LM)*lF*xvF8@ZO`P1qVuv@&Ce5^3{PS#A${# z9sP<#+P6bxGB9F)jltGnknXoq@fRTmzZ}kUff6~9PQJ8HJuIZ!X;fIW4|`R|%$a5c zm*gMizCrlkki-6zVY$>S$N4#;)tN~P@MpY09np!+qLA+clBN}&;=k7e?CLdloI4(He#sQ&p#)0}XP~`sf+xX__Fd#U- znqi{#%P_$~hcB}aH$v?}fDxtlCc*5bNYSD0_RxC-7ep@Z3Q-Fl7$p1H90v(sDI_$Y zp^Vv2`6EvfOYCG>IzE@JSAmac?z!4G@N+DW1IGBz0|ooa29*}vol#~m6|k1Es}`PO zIG#i-!=37cKSfOP_vXo%->uWO8uOp0CJHbXyem6DL+*xzERMS zP1s98Zl+n#YRnVYZ1URc89Lp(9dx~HkiXy#-3C~jJ-oBz=EI7FCMwp;Ic|pvl zMj1%WA@(Zbyr{c~Qi*4d%5-r&lh727Ht1zDz zd&zB*E$xy?5)(DSf{=G2=Vg$ABjglT2ppvfk6%LmK7^IP!3SyOXkix&7G=qpejjbx z?h;0MY)JQ-jtybMglx%w^FL*QAqqQ$=H)-Bm)klTCk*D%(=Z{AF`6(+KNoo<7y0-{ zuHu=E69OJ@q3rH8VXBjpFXX(3oDX`v4HITb^-#VLa*x4FrRrzO z$uqN2ow-rr0?xJ>eDOC@H~#SR3JDGBc*E1U;4NMjo}nPBcvJ;FCn`ManHt^^n`QOo zr4;*|d5oaxRir2WHO(%Qk%k8Q!3yzS|pl;pp zm};bfZkc@%?fo|YWWD-25FLA_ghO3~z5Hx~H&zNt^1F^EBxn~5T_vPkb<8uZFD&Gj-%~9p~qm#U22Zio2su6u24Bj1X zzssnYOP%5N|1c__<-tDtLkL%<_EJh)grk^IF`Flh^^NdPc#v&noZrTL$5gg!ov&n_ z^+k&q_bgtR(&Cpwj|jiu9+rG;HC-0uSeIeI-FWxRnKLE4Z0!NI=xAg6%NGfI>Vj4=OA>aV)#DY8a@W<4l8u(8h2#m;=#({z8 zHQu6yR|EU1>bqhh?@GRg&i{qK_{thJ^qx9SE z51-62=3Of=$38O)Y&;M+t2o{vl*F-r2@wT{rkY-WR$YKyR2iHf$)l!=SzdQv%d6&cN3$beR^2|j(#swnsVc1GqS@rWdI1AjE`-LkVZqw zpg6vTFQNBnv8Dd-=Y*`R$o-i-N5>MlYNo_bNas%={S_P^Q@q4!Na-bMOC^mdQk*hS z(()y(S<-e&8U_=Al7M0jW%$p2jRwRW+wY(nQk z5COL>I9S$8B||-Sd_4!!w+KV3lg6{qdN5O-=8eFF2lvN|go7nZBJj86Yna^8zDseV zQfIuaXOQ@No%m1;P%DtaxRr};st;J1vz-VG`Pnz z<6@9XA=}~w+_gR<_I@qv^VU~s7z>$d*k`=ZjH`P|!$wKNXh~Pr4a)nNRYVqXTC(hX zp^zVE*uQoIzrqXo3g{J%0R((CakB%yFG@So*Y1VBNGow|Ogp}uDa+)&ph zK|Rx zVr@{v@Jttlm)Zo>I~pp_e#H&-s)C9Q-UrGRzQ>*T9XoY}HPk5kM{cN}B-QnIWiZj> z6*1yN=dQC(5?PuLk&m}=^Q;NJExo^gyLKQ6~f3%97c?wJ^`YoUg^%YMKOH7g0~2-&IY6-+^$x~|ty z_t*t)sIf^<|5VDf9zXqkyoLQW)O~h}8>(v()CM+g)b-WoIMf$sD{MSyfADJ;Hkx@! zkjOaHZH8FYvizKk7!gN-yV&LU^GzU1oR0bT;pOBQq&_!!5V zt>OP`WE{S+pP=Wpg#!PY8~(2XA78N+a-AX-H(?dzRQ$9C6y1tF%MBPx0_^__)!o1U zBi`K`HK1%o>;gC7*d)MTv+hu11ySIg8vAR&#RBMY1Ewgz7-?@*9x*+B!9wH!?m0{K z@dW{F{FMtwYj{b_iNUmy8lS>3)dBo34fu)xKJ5nlSrXurlT}mOI0U+UUc>G3tq_j za;91`9XdKBo3dz+Hxd=E(C}3P|Ee4Q54Gd^#Z&!!Ml$1QGMJdfLmU0;b$8!iJ`n$ zloQb&AC<;yd{+a0Du6G$0e_nW*jukScO?Xcc^dFD0i5Xuyf+E(ULdIP&9~#kfJX%IDmP$>0*n#R zRxJsC&e5ks2ZJ==zXUMT4VaY#_%bJ3AoqHmoN9v8qt zZot7wfCp4y*&a(0yR{4rctQZbdd}6ygS;gAI2{8tYODd?Y3hf)6mm`q;G1s1Wl4ZN z^-gugpW{vag$6t&fRDHVA5Q{&_B_?cE(xsOrUAbZK%X11I0bf8hrF zWfI^dy*nvNu;!m?K(7Ga<_4Ue1bCj72oGX8;M9nUK<1+u7umUPz>AUq_lhzy+9Q_$ zW)5Jw1{5DpyY=U;J|5sD(Z|1`d;qX#V)b#@z8o#FjF)Z=e>2APo zNq|53qw3>N`EhFeQXF}suVJskedB~rj_X3!^O8W$xq3%i!8XJJd_x1uX`cO@8}NlB zzK@czzdUVTnr}$IY<8x z2kfc=#fHrO@@K9-?&l@Z$3}QRfKUD_4)}q%dq-cx6@hqzZos#a0Ec?iaE#=~LCz8Z zoLhgc06y#nd^85AR7-t6RWY9gAJ%c%__p0jOpbs;G}0{M*00kym2U zh(Sd>A7_4A%l9ssT}LkH9yhA1&ZB)tF+C+zgo;oy<$l`p0x@u~{&rt7tFQdaTN(=i zqT#s)FoGtp@4whG#*66nT}5bncKkw#LWxdT1JB7$IQ?xKDb+lZPF zE$mRdP41unAxA}6ClJd|VgL4c0Fqe!s`~r7Gs>|ZJo}UH>VdxRUKE=k`feG`QDV@BkLb|uZ{-a6`{*;^-B4bF>5|Wgc17k?AqT%PL zQ>z{JV>+u0>-F1o)_ZjlS1S#-SP}*L2RdmKNukem(s-RT2sGRV7Zb0A?I2=4 zqi_Nlpp`%R?g5I`uj!+kEIqLSIz(5#)c`d=BE#RmIm?5I!8CfmVS~gbv}aI3xbSzR zd!8T?PT@)HAyAPA*u=RVhBzvRs>pu@`9;ld&PaO@AdaxG@vDDqM`PM;QU?F@3~`?h z6=Nj8{fWdEgg}`Ho9x@r6f*tnk0l&%bPi0R(f)*I3CtuxCsmXSabmlwA$1Jw$C-AC zLS6PBbH5S=0hHfRocknHmbOJSu(iUCEHf*4S^|!?eD3kDE8D^FWT*jun-6T+ZoAk+H17Tz-b9+>-(6Vi{Wp zuSvR2Th5;e{-F4ixM?l-T*%oap=ym~!3<93I!4bKoItfIpJEP|oxzE2KXL4e=bOFH z72izRZ_#e$9>>9QV1ofY)p zMiwur!5l(}#RkIlciOjz#?Y#_HxO;YjFzb>u!(00u%|9z6~l(%G?{2<{Q)lN=k-Yl zpC^CZvP4=&$iZ-}#EI`hJYT$sQb$Tx2p6r|P<~?coK|C59r#>XhI3}8{pH!ZzW;F5 zH}mxRR%JN#sj8m16k})!FJA=8N<0{|kAex<9%O#lF#%UO!<->89~%1!57=hT=rQd^ z38ZzrPuXy+bgi^fd5xz1VG2ye8rIa>SC{kHD49N-ly*vZ_B-NJLyRW;=nom?aR^g7 z?n^bi!efuwC8JUJ+A=lxfB7Lv=1xgUYe5UCr~6)XDS#d%kR$|RNuFGtXH`5;jm~r1 z2o;Uwhmyxb9^p@p{oyfj)qY}_3f^=|Y{ezMzTB!q{|5^uMTDULSL*&_Gz5=O2-pe& zZl$XV0fhWxPI_L2kKd^f%N|8o4{WH1YtJOm}1x{Hh;G7^9i*$GZwwF_r~V z=;HCwvvZSgyJ5ZYi)D*D&z$bCun%VKgGC3i$dfwl2`8Cua490;BxjzzvGr5w|76|& zC)1stizQj9^#GaOvZ}=ERH0t}N+Vr_w28q9h&@{5ZUj=Ik;ePsKfNDabwB!xH%+

So turns out that none of the `replace=True` things actually work because of the map caches (except for `register_attribute_builder(replace=True)`, which doesn't use such a cache). This was hidden by a series of unfortunate events: 1. `register_type_caster` failure was hidden because it was the same `TestIntegerRankedTensorType` being replaced with itself (d'oh). 2. `register_operation` failure was hidden behind the "order of events" in the lifecycle of typical extension import/use. Since extensions are loaded/registered almost immediately after generated builders are registered, there is no opportunity for the `operationClassMapCache` to be populated (through e.g., `module.body.operations[2]` or `module.body.operations[2].opview` or something). Of course as soon as you as actually do "late-bind/late-register" the extension, you see it's not successfully replacing the stale one in `operationClassMapCache`. I'll take this opportunity to propose we ditch the caches all together. I've been cargo-culting them but I really don't understand how they work. There's this comment above `operationClassMapCache` ```cpp /// Cache of operation name to external operation class object. This is /// maintained on lookup as a shadow of operationClassMap in order for repeat /// lookups of the classes to only incur the cost of one hashtable lookup. llvm::StringMap operationClassMapCache; ``` But I don't understand how that's true given that the canonical thing `operationClassMap` is already a map: ```cpp /// Map of full operation name to external operation class object. llvm::StringMap operationClassMap; ``` Maybe it wasn't always the case? Anyway things work now but it seems like an unnecessary layer of complexity for not much gain? But maybe I'm wrong. --- mlir/lib/Bindings/Python/IRModule.cpp | 8 +++++++ mlir/test/python/dialects/python_test.py | 13 +++++++++++ mlir/test/python/ir/operation.py | 28 ++++++++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/mlir/lib/Bindings/Python/IRModule.cpp b/mlir/lib/Bindings/Python/IRModule.cpp index a1c8ab7a09ce15..f8e22f7bb0c1ba 100644 --- a/mlir/lib/Bindings/Python/IRModule.cpp +++ b/mlir/lib/Bindings/Python/IRModule.cpp @@ -82,6 +82,10 @@ void PyGlobals::registerTypeCaster(MlirTypeID mlirTypeID, if (found && !found.is_none() && !replace) throw std::runtime_error("Type caster is already registered"); found = std::move(typeCaster); + const auto foundIt = typeCasterMapCache.find(mlirTypeID); + if (foundIt != typeCasterMapCache.end() && !foundIt->second.is_none()) { + typeCasterMapCache[mlirTypeID] = found; + } } void PyGlobals::registerDialectImpl(const std::string &dialectNamespace, @@ -104,6 +108,10 @@ void PyGlobals::registerOperationImpl(const std::string &operationName, .str()); } found = std::move(pyClass); + auto foundIt = operationClassMapCache.find(operationName); + if (foundIt != operationClassMapCache.end() && !foundIt->second.is_none()) { + operationClassMapCache[operationName] = found; + } } std::optional diff --git a/mlir/test/python/dialects/python_test.py b/mlir/test/python/dialects/python_test.py index 651e6554eebe8b..3d4cd087fbfed8 100644 --- a/mlir/test/python/dialects/python_test.py +++ b/mlir/test/python/dialects/python_test.py @@ -510,6 +510,19 @@ def type_caster(pytype): except RuntimeError as e: print(e) + def type_caster(pytype): + return RankedTensorType(pytype) + + # python_test dialect registers a caster for RankedTensorType in its extension (pybind) module. + # So this one replaces that one (successfully). And then just to be sure we restore the original caster below. + register_type_caster(c.typeid, type_caster, replace=True) + + d = tensor.EmptyOp([10, 10], IntegerType.get_signless(5)).result + # CHECK: tensor<10x10xi5> + print(d.type) + # CHECK: ranked tensor type RankedTensorType(tensor<10x10xi5>) + print("ranked tensor type", repr(d.type)) + def type_caster(pytype): return test.TestIntegerRankedTensorType(pytype) diff --git a/mlir/test/python/ir/operation.py b/mlir/test/python/ir/operation.py index 129b7fa744e472..04239b048c1c64 100644 --- a/mlir/test/python/ir/operation.py +++ b/mlir/test/python/ir/operation.py @@ -5,6 +5,8 @@ import itertools from mlir.ir import * from mlir.dialects.builtin import ModuleOp +from mlir.dialects import arith +from mlir.dialects._ods_common import _cext def run(f): @@ -646,6 +648,7 @@ def testKnownOpView(): %1 = "custom.f32"() : () -> f32 %2 = "custom.f32"() : () -> f32 %3 = arith.addf %1, %2 : f32 + %4 = arith.constant 0 : i32 """ ) print(module) @@ -668,6 +671,31 @@ def testKnownOpView(): # CHECK: OpView object print(repr(custom)) + # constant should map to an extension OpView class in the arithmetic dialect. + constant = module.body.operations[3] + # CHECK: .ConstantOp object + print(repr(constant)) + # CHECK-LABEL: TEST: testSingleResultProperty @run From e6971e5a41fe30264af9a13c8f387c06f93c6d9c Mon Sep 17 00:00:00 2001 From: flyingcat <1004815462@qq.com> Date: Tue, 31 Oct 2023 10:01:07 +0800 Subject: [PATCH 109/144] [RISCV][NFC] Simplify vector register decoding methods (#70423) Combine redundant 'if' statements and simplify 'switch' statements. --- .../RISCV/Disassembler/RISCVDisassembler.cpp | 26 +++++-------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index e5ce029449a8c6..9bd7cacbf5f04b 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -196,10 +196,7 @@ static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint32_t RegNo, static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { - if (RegNo >= 32) - return MCDisassembler::Fail; - - if (RegNo % 2) + if (RegNo >= 32 || RegNo % 2) return MCDisassembler::Fail; const RISCVDisassembler *Dis = @@ -216,10 +213,7 @@ static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint32_t RegNo, static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { - if (RegNo >= 32) - return MCDisassembler::Fail; - - if (RegNo % 4) + if (RegNo >= 32 || RegNo % 4) return MCDisassembler::Fail; const RISCVDisassembler *Dis = @@ -236,10 +230,7 @@ static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint32_t RegNo, static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { - if (RegNo >= 32) - return MCDisassembler::Fail; - - if (RegNo % 8) + if (RegNo >= 32 || RegNo % 8) return MCDisassembler::Fail; const RISCVDisassembler *Dis = @@ -256,16 +247,11 @@ static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint32_t RegNo, static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { - MCRegister Reg = RISCV::NoRegister; - switch (RegNo) { - default: + if (RegNo > 2) { return MCDisassembler::Fail; - case 0: - Reg = RISCV::V0; - break; - case 1: - break; } + MCRegister Reg = (RegNo == 0) ? RISCV::V0 : RISCV::NoRegister; + Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } From a5403a3a69d10d79c1abd361f02460380e08b2c0 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Mon, 30 Oct 2023 20:45:03 -0700 Subject: [PATCH 110/144] [libcxx] Amend XFAIL for failing tests on Windows (#70422) Some tests starting passing/failing after #69431 because Clang no longer enables -fdelayed-template-parsing by default on Windows with C++20. --- .../atomics/diagnose_invalid_memory_order.verify.cpp | 7 ++++--- libcxx/test/libcxx/fuzzing/random.pass.cpp | 4 ++++ libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp | 4 ++++ libcxx/test/std/numerics/c.math/cmath.pass.cpp | 4 ++++ 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp b/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp index a6c1160c7022f0..defd43cf267a9f 100644 --- a/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp +++ b/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// -// This test fails because diagnose_if doesn't emit all of the diagnostics -// when -fdelayed-template-parsing is enabled, like it is in MSVC mode. -// XFAIL: msvc +// This test fails with Clang <18 because diagnose_if doesn't emit all of the +// diagnostics when -fdelayed-template-parsing is enabled, like it is in MSVC +// mode. +// XFAIL: msvc && (clang-16 || clang-17) // REQUIRES: diagnose-if-support diff --git a/libcxx/test/libcxx/fuzzing/random.pass.cpp b/libcxx/test/libcxx/fuzzing/random.pass.cpp index 79ab7ac41151c9..69b496fa3a4e66 100644 --- a/libcxx/test/libcxx/fuzzing/random.pass.cpp +++ b/libcxx/test/libcxx/fuzzing/random.pass.cpp @@ -6,6 +6,10 @@ // //===----------------------------------------------------------------------===// +// This test fails because Clang no longer enables -fdelayed-template-parsing +// by default on Windows with C++20 (#69431). +// XFAIL: msvc && clang-18 + // UNSUPPORTED: c++03, c++11 #include diff --git a/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp index 87767a2ee4311c..7a6c71ae68f797 100644 --- a/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp @@ -6,6 +6,10 @@ // //===----------------------------------------------------------------------===// +// This test fails because Clang no longer enables -fdelayed-template-parsing +// by default on Windows with C++20 (#69431). +// XFAIL: msvc && clang-18 + // #include diff --git a/libcxx/test/std/numerics/c.math/cmath.pass.cpp b/libcxx/test/std/numerics/c.math/cmath.pass.cpp index 11a3de748cb7a6..a6a9dad639cdff 100644 --- a/libcxx/test/std/numerics/c.math/cmath.pass.cpp +++ b/libcxx/test/std/numerics/c.math/cmath.pass.cpp @@ -6,6 +6,10 @@ // //===----------------------------------------------------------------------===// +// This test fails because Clang no longer enables -fdelayed-template-parsing +// by default on Windows with C++20 (#69431). +// XFAIL: msvc && clang-18 + // #include From 3e5187ea83067c3ec0d0e75085858acc4d1dc619 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 30 Oct 2023 21:09:21 -0700 Subject: [PATCH 111/144] Revert "[HWASAN] Enable memcpy, memmove and memset interceptors (#70387)" Breaks build bots, details in #70387. This reverts commit 91cdd7d615da38a1f025646f526c2fce265a37e2. --- .../lib/hwasan/hwasan_interceptors.cpp | 27 +++++++++++++++- .../lib/hwasan/hwasan_platform_interceptors.h | 12 +++---- compiler-rt/test/hwasan/TestCases/bcmp.cpp | 15 +++------ compiler-rt/test/hwasan/TestCases/memcmp.cpp | 15 +++------ compiler-rt/test/hwasan/TestCases/memcpy.cpp | 32 ------------------- compiler-rt/test/hwasan/TestCases/memmove.cpp | 32 ------------------- compiler-rt/test/hwasan/TestCases/memset.cpp | 32 ------------------- 7 files changed, 40 insertions(+), 125 deletions(-) delete mode 100644 compiler-rt/test/hwasan/TestCases/memcpy.cpp delete mode 100644 compiler-rt/test/hwasan/TestCases/memmove.cpp delete mode 100644 compiler-rt/test/hwasan/TestCases/memset.cpp diff --git a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp index 5171f035f97f76..0889831373a803 100644 --- a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp +++ b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp @@ -90,7 +90,8 @@ struct HWAsanInterceptorContext { # include "sanitizer_common/sanitizer_syscalls_netbsd.inc" # define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \ - HWASAN_WRITE_RANGE(ctx, ptr, size) + do { \ + } while (false) # define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size) \ HWASAN_READ_RANGE(ctx, ptr, size) @@ -146,6 +147,30 @@ struct HWAsanInterceptorContext { (void)(name); \ } while (false) +# define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size) \ + do { \ + (void)(ctx); \ + (void)(to); \ + (void)(from); \ + (void)(size); \ + } while (false) + +# define COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size) \ + do { \ + (void)(ctx); \ + (void)(to); \ + (void)(from); \ + (void)(size); \ + } while (false) + +# define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size) \ + do { \ + (void)(ctx); \ + (void)(block); \ + (void)(c); \ + (void)(size); \ + } while (false) + # define COMMON_INTERCEPTOR_STRERROR() \ do { \ } while (false) diff --git a/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h b/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h index d92b5105219427..86d26b5ac12d4a 100644 --- a/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h +++ b/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h @@ -56,14 +56,14 @@ #undef SANITIZER_INTERCEPT_STRCASECMP #define SANITIZER_INTERCEPT_STRCASECMP 0 -// #undef SANITIZER_INTERCEPT_MEMSET -// #define SANITIZER_INTERCEPT_MEMSET 0 +#undef SANITIZER_INTERCEPT_MEMSET +#define SANITIZER_INTERCEPT_MEMSET 0 -// #undef SANITIZER_INTERCEPT_MEMMOVE -// #define SANITIZER_INTERCEPT_MEMMOVE 0 +#undef SANITIZER_INTERCEPT_MEMMOVE +#define SANITIZER_INTERCEPT_MEMMOVE 0 -// #undef SANITIZER_INTERCEPT_MEMCPY -// #define SANITIZER_INTERCEPT_MEMCPY 0 +#undef SANITIZER_INTERCEPT_MEMCPY +#define SANITIZER_INTERCEPT_MEMCPY 0 // #undef SANITIZER_INTERCEPT_MEMCMP // #define SANITIZER_INTERCEPT_MEMCMP 0 diff --git a/compiler-rt/test/hwasan/TestCases/bcmp.cpp b/compiler-rt/test/hwasan/TestCases/bcmp.cpp index 9b21bba56b1bee..a83147b0f32052 100644 --- a/compiler-rt/test/hwasan/TestCases/bcmp.cpp +++ b/compiler-rt/test/hwasan/TestCases/bcmp.cpp @@ -4,17 +4,11 @@ // RUN: %clangxx_hwasan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s // REQUIRES: !android -#include #include #include #include #include -__attribute__((no_sanitize("hwaddress"))) void -ForceCallInterceptor(void *p, const void *a, size_t size) { - assert(bcmp(p, a, size) == 0); -} - int main(int argc, char **argv) { __hwasan_enable_allocator_tagging(); char a[] = {static_cast(argc), 2, 3, 4}; @@ -22,14 +16,13 @@ int main(int argc, char **argv) { char *p = (char *)malloc(size); memcpy(p, a, size); free(p); - ForceCallInterceptor(p, a, size); - return 0; + return bcmp(p, a, size); // CHECK: HWAddressSanitizer: tag-mismatch on address // CHECK: READ of size 4 - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-4]] + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-3]] // CHECK: Cause: use-after-free // CHECK: freed by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-8]] + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-7]] // CHECK: previously allocated by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-12]] + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-11]] } diff --git a/compiler-rt/test/hwasan/TestCases/memcmp.cpp b/compiler-rt/test/hwasan/TestCases/memcmp.cpp index 31915527c27fdd..5f8a93f62a44a1 100644 --- a/compiler-rt/test/hwasan/TestCases/memcmp.cpp +++ b/compiler-rt/test/hwasan/TestCases/memcmp.cpp @@ -3,17 +3,11 @@ // RUN: %clangxx_hwasan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s // RUN: %clangxx_hwasan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s -#include #include #include #include #include -__attribute__((no_sanitize("hwaddress"))) void -ForceCallInterceptor(void *p, const void *a, size_t size) { - assert(memcmp(p, a, size) == 0); -} - int main(int argc, char **argv) { __hwasan_enable_allocator_tagging(); char a[] = {static_cast(argc), 2, 3, 4}; @@ -21,14 +15,13 @@ int main(int argc, char **argv) { char *p = (char *)malloc(size); memcpy(p, a, size); free(p); - ForceCallInterceptor(p, a, size); - return 0; + return memcmp(p, a, size); // CHECK: HWAddressSanitizer: tag-mismatch on address // CHECK: READ of size 4 - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcmp.cpp:[[@LINE-4]] + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcmp.cpp:[[@LINE-3]] // CHECK: Cause: use-after-free // CHECK: freed by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcmp.cpp:[[@LINE-8]] + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcmp.cpp:[[@LINE-7]] // CHECK: previously allocated by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcmp.cpp:[[@LINE-12]] + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcmp.cpp:[[@LINE-11]] } diff --git a/compiler-rt/test/hwasan/TestCases/memcpy.cpp b/compiler-rt/test/hwasan/TestCases/memcpy.cpp deleted file mode 100644 index 830449488fec49..00000000000000 --- a/compiler-rt/test/hwasan/TestCases/memcpy.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// RUN: %clangxx_hwasan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s -// RUN: %clangxx_hwasan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s -// RUN: %clangxx_hwasan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s -// RUN: %clangxx_hwasan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s - -#include -#include -#include -#include - -__attribute__((no_sanitize("hwaddress"))) void -ForceCallInterceptor(void *p, const void *a, size_t size) { - memcpy(p, a, size); -} - -int main(int argc, char **argv) { - __hwasan_enable_allocator_tagging(); - char a[] = {static_cast(argc), 2, 3, 4}; - int size = sizeof(a); - char *volatile p = (char *)malloc(size); - free(p); - ForceCallInterceptor(p, a, size); - return 0; - // CHECK: HWAddressSanitizer: tag-mismatch on address - // CHECK: WRITE of size 4 - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcpy.cpp:[[@LINE-4]] - // CHECK: Cause: use-after-free - // CHECK: freed by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcpy.cpp:[[@LINE-8]] - // CHECK: previously allocated by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcpy.cpp:[[@LINE-11]] -} diff --git a/compiler-rt/test/hwasan/TestCases/memmove.cpp b/compiler-rt/test/hwasan/TestCases/memmove.cpp deleted file mode 100644 index 40dc3deeb39350..00000000000000 --- a/compiler-rt/test/hwasan/TestCases/memmove.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// RUN: %clangxx_hwasan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s -// RUN: %clangxx_hwasan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s -// RUN: %clangxx_hwasan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s -// RUN: %clangxx_hwasan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s - -#include -#include -#include -#include - -__attribute__((no_sanitize("hwaddress"))) void -ForceCallInterceptor(void *p, const void *a, size_t size) { - memmove(p, a, size); -} - -int main(int argc, char **argv) { - __hwasan_enable_allocator_tagging(); - char a[] = {static_cast(argc), 2, 3, 4}; - int size = sizeof(a); - char *volatile p = (char *)malloc(size); - free(p); - ForceCallInterceptor(p, a, size); - return 0; - // CHECK: HWAddressSanitizer: tag-mismatch on address - // CHECK: WRITE of size 4 - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memmove.cpp:[[@LINE-4]] - // CHECK: Cause: use-after-free - // CHECK: freed by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memmove.cpp:[[@LINE-8]] - // CHECK: previously allocated by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memmove.cpp:[[@LINE-11]] -} diff --git a/compiler-rt/test/hwasan/TestCases/memset.cpp b/compiler-rt/test/hwasan/TestCases/memset.cpp deleted file mode 100644 index ae31a3bfe9cdaa..00000000000000 --- a/compiler-rt/test/hwasan/TestCases/memset.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// RUN: %clangxx_hwasan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s -// RUN: %clangxx_hwasan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s -// RUN: %clangxx_hwasan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s -// RUN: %clangxx_hwasan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s - -#include -#include -#include -#include - -__attribute__((no_sanitize("hwaddress"))) void -ForceCallInterceptor(void *p, int c, size_t size) { - memset(p, c, size) == nullptr; -} - -int main(int argc, char **argv) { - __hwasan_enable_allocator_tagging(); - char a[] = {static_cast(argc), 2, 3, 4}; - int size = sizeof(a); - char *volatile p = (char *)malloc(size); - free(p); - ForceCallInterceptor(p, 0, size); - return 0; - // CHECK: HWAddressSanitizer: tag-mismatch on address - // CHECK: WRITE of size 4 - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memset.cpp:[[@LINE-4]] - // CHECK: Cause: use-after-free - // CHECK: freed by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memset.cpp:[[@LINE-8]] - // CHECK: previously allocated by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memset.cpp:[[@LINE-11]] -} From 18f036d0105589c3175bb51a518c5d272dae61e2 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Tue, 31 Oct 2023 11:32:27 +0700 Subject: [PATCH 112/144] [test] Align behavior of interrupts.test on different platforms (#68556) The test llvm/Support/interrupts.test behaves differently on Linux and Windows. On Linux the function 'run_wrapper' runs process with stderr connected to pipe, while on Windows stderr is mapped to stderr of the running script. When no output was made to stderr, this difference was not observable. The new version of llvm-symbolizer (https://reviews.llvm.org/D149759) complains about missing binary file, so stderr is not empty anymore and the test fails on Windows and passes on Linux. --- llvm/test/Support/interrupts.test | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/test/Support/interrupts.test b/llvm/test/Support/interrupts.test index 86730f5139c042..752426c5292b09 100644 --- a/llvm/test/Support/interrupts.test +++ b/llvm/test/Support/interrupts.test @@ -10,7 +10,8 @@ import sys import time def run_symbolizer(): - proc = subprocess.Popen([sys.argv[2]], stdout=subprocess.PIPE, stdin=subprocess.PIPE) + proc = subprocess.Popen([sys.argv[2]], stdout=subprocess.PIPE, + stdin=subprocess.PIPE, stderr=sys.stderr) # Write then read some output to ensure the process has started fully. proc.stdin.write(b'foo bar\n') proc.stdin.flush() @@ -29,13 +30,10 @@ def run_wrapper(): if os.name == 'nt': startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - proc = subprocess.Popen(args, - stderr=sys.stderr, - startupinfo=startupinfo, - creationflags=subprocess.CREATE_NEW_CONSOLE) + subprocess.run(args, stderr=sys.stderr, startupinfo=startupinfo, + creationflags=subprocess.CREATE_NEW_CONSOLE) else: - proc = subprocess.Popen(args, - stderr=subprocess.PIPE) + subprocess.run(args, stderr=sys.stderr) if sys.argv[1] == 'wrapper': run_wrapper() From ae7f7f2ef2033a48fb9db3cb70b88ad62019f40b Mon Sep 17 00:00:00 2001 From: Mikhail Gudim Date: Tue, 31 Oct 2023 00:37:30 -0400 Subject: [PATCH 113/144] [RISCV] Add `TuneVentanaVeyron` subtarget feature. (#70414) This will be used to add veyron fusions in a later commit. --- llvm/lib/Target/RISCV/RISCVFeatures.td | 4 ++++ llvm/lib/Target/RISCV/RISCVProcessors.td | 3 ++- llvm/lib/Target/RISCV/RISCVSubtarget.h | 4 +++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 979bc0ea8c7d06..ba6c63d8958e64 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -954,6 +954,10 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7", [TuneNoDefaultUnroll, TuneShortForwardBranchOpt]>; +def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron", + "Ventana-Veyron Series processors", + [TuneLUIADDIFusion]>; + // Assume that lock-free native-width atomics are available, even if the target // and operating system combination would not usually provide them. The user // is responsible for providing any necessary __sync implementations. Code diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index e4008d145ffa57..5465e0c998ca6f 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -242,4 +242,5 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1", FeatureStdExtZicbom, FeatureStdExtZicbop, FeatureStdExtZicboz, - FeatureVendorXVentanaCondOps]>; + FeatureVendorXVentanaCondOps], + [TuneVentanaVeyron]>; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 6b915e61c13608..f8fab760140681 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -46,11 +46,13 @@ struct RISCVTuneInfo { class RISCVSubtarget : public RISCVGenSubtargetInfo { public: + // clang-format off enum RISCVProcFamilyEnum : uint8_t { Others, SiFive7, + VentanaVeyron, }; - + // clang-format on private: virtual void anchor(); From 6258da14d6c8048b24254f06c346d2d3d57e647a Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 30 Oct 2023 22:22:45 -0700 Subject: [PATCH 114/144] [OpenMP] Lower synchronization threshold for reductions This should provide an easy performance boost by only avoiding synchronization that was unnessary anyway. --- openmp/libomptarget/DeviceRTL/src/Reduction.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp index 8ec8d296196337..a041d239e1abb4 100644 --- a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp @@ -208,7 +208,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2( // to the number of slots in the buffer. bool IsMaster = (ThreadId == 0); while (IsMaster) { - Bound = atomic::load(&IterCnt, atomic::seq_cst); + Bound = atomic::load(&IterCnt, atomic::aquire); if (TeamId < Bound + num_of_records) break; } @@ -220,8 +220,6 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2( } else lgredFct(GlobalBuffer, ModBockId, reduce_data); - fence::system(atomic::seq_cst); - // Increment team counter. // This counter is incremented by all teams in the current // BUFFER_SIZE chunk. @@ -230,7 +228,9 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2( } // Synchronize if (mapping::isSPMDMode()) - __kmpc_barrier(Loc, TId); + synchronize::threadsAligned(atomic::acq_rel); + else + fence::kernel(atomic::acq_rel); // reduce_data is global or shared so before being reduced within the // warp we need to bring it in local memory: From a396fb247e0719f56a830a9e4aab0449be7f843a Mon Sep 17 00:00:00 2001 From: Piotr Zegar Date: Tue, 31 Oct 2023 06:52:40 +0100 Subject: [PATCH 115/144] [clang-tidy] Fix crash in modernize-use-trailing-return-type (#70709) Resolved the crash that occurred during the use of a user-defined C-style string literal. The fix entails checking whether the identifier is non-empty before attempting to read its name. --- .../modernize/UseTrailingReturnTypeCheck.cpp | 2 +- .../use-trailing-return-type-cxx20.cpp | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp index b81cfbcbfd16cc..5a456c58fb5cc5 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp @@ -103,7 +103,7 @@ struct UnqualNameVisitor : public RecursiveASTVisitor { bool VisitDeclRefExpr(DeclRefExpr *S) { DeclarationName Name = S->getNameInfo().getName(); - return S->getQualifierLoc() || !Name.isIdentifier() || + return S->getQualifierLoc() || Name.isEmpty() || !Name.isIdentifier() || !visitUnqualName(Name.getAsIdentifierInfo()->getName()); } diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type-cxx20.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type-cxx20.cpp index 63fe7a95fdc94a..72fdcc01779650 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type-cxx20.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type-cxx20.cpp @@ -98,3 +98,21 @@ struct TestDefaultOperatorB { // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: use a trailing return type for this function [modernize-use-trailing-return-type] // CHECK-FIXES: {{^}} friend auto operator<(const TestDefaultOperatorB &, const TestDefaultOperatorB &) noexcept -> bool = default;{{$}} }; + +namespace PR69863 { + +template +struct CustomCompileTimeString { + constexpr CustomCompileTimeString(const char (&)[Len]) noexcept {} +}; + +template +constexpr decltype(Str) operator""__csz() noexcept { +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: use a trailing return type for this function [modernize-use-trailing-return-type] +// CHECK-FIXES: {{^}}constexpr auto operator""__csz() noexcept -> decltype(Str) { + return Str; +} + +inline constexpr CustomCompileTimeString SomeString = "This line will cause a crash"__csz; + +} From dcae289d3a4f77b50efc8b8ecd2d5a58c86933ca Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Tue, 31 Oct 2023 07:34:49 +0100 Subject: [PATCH 116/144] [MLIR][SparseTensor] Introduce opaque pointers in LLVM dialect lowering (#70570) This commit changes the SparseTensor LLVM dialect lowering from using `llvm.ptr` to `llvm.ptr`. This change ensures that the lowering now properly relies on opaque pointers, instead of working with already type erased i8 pointers. --- .../SparseTensor/Transforms/CodegenUtils.cpp | 2 +- .../Transforms/SparseTensorConversion.cpp | 2 +- mlir/test/Dialect/SparseTensor/codegen.mlir | 32 ++--- .../test/Dialect/SparseTensor/conversion.mlir | 122 +++++++++--------- .../Dialect/SparseTensor/sparse_expand.mlir | 2 +- .../SparseTensor/sparse_fill_zero.mlir | 34 ++--- .../Dialect/SparseTensor/sparse_lower.mlir | 16 +-- .../SparseTensor/sparse_lower_col.mlir | 16 +-- .../SparseTensor/sparse_lower_inplace.mlir | 16 +-- .../SparseTensor/sparse_perm_lower.mlir | 4 +- 10 files changed, 123 insertions(+), 123 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index b1b1d67ac2d420..74347625961999 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -341,7 +341,7 @@ func::CallOp mlir::sparse_tensor::createFuncCall( } Type mlir::sparse_tensor::getOpaquePointerType(MLIRContext *ctx) { - return LLVM::LLVMPointerType::get(IntegerType::get(ctx, 8)); + return LLVM::LLVMPointerType::get(ctx); } Type mlir::sparse_tensor::getOpaquePointerType(Builder &builder) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index 570be951cab845..79f6013640440d 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -42,7 +42,7 @@ namespace { /// Maps each sparse tensor type to an opaque pointer. static std::optional convertSparseTensorTypes(Type type) { if (getSparseTensorEncoding(type) != nullptr) - return LLVM::LLVMPointerType::get(IntegerType::get(type.getContext(), 8)); + return LLVM::LLVMPointerType::get(type.getContext()); return std::nullopt; } diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index c53ec7408bc3b8..5876eef6f19327 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -664,7 +664,7 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> } // CHECK-LABEL: func.func @sparse_new_coo( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { // CHECK-DAG: %[[VAL_1:.*]] = arith.constant false // CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : i32 // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index @@ -674,9 +674,9 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<2xindex> to memref // CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<2xindex> // CHECK: memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex> -// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr -// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref -// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index // CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index @@ -696,7 +696,7 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref // CHECK: memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex> // CHECK: memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex> -// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 // CHECK: %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1 // CHECK: scf.if %[[VAL_31]] { // CHECK: sparse_tensor.sort hybrid_quick_sort %[[VAL_10]], %[[VAL_16]] jointly %[[VAL_17]] @@ -704,15 +704,15 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) -> // CHECK: memref.store %[[VAL_10]], %[[VAL_25]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_27]] crd_mem_sz at 0 with %[[VAL_13]] // CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_10]] -// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () +// CHECK: call @delSparseTensorReader(%[[VAL_8]]) : (!llvm.ptr) -> () // CHECK: return %[[VAL_25]], %[[VAL_16]], %[[VAL_17]], %[[VAL_33]] -func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { - %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor +func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { + %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func.func @sparse_new_coo_permute_no( -// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { +// CHECK-SAME: %[[A0:.*]]: !llvm.ptr) -> (memref, memref, memref, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) { // CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2 : i32 // CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index @@ -721,9 +721,9 @@ func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { // CHECK: %[[VAL_6:.*]] = memref.cast %[[VAL_5]] : memref<2xindex> to memref // CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex> // CHECK: memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex> -// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr -// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref -// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index +// CHECK: %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref, i32) -> !llvm.ptr +// CHECK: %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref // CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index @@ -747,13 +747,13 @@ func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor { // CHECK: %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref // CHECK: memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex> // CHECK: memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex> -// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 +// CHECK: %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref, memref, memref, memref) -> i1 // CHECK: memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref // CHECK: %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]] crd_mem_sz at 0 with %[[VAL_12]] // CHECK: %[[VAL_33:.*]] = sparse_tensor.storage_specifier.set %[[VAL_32]] val_mem_sz with %[[VAL_9]] -// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () +// CHECK: call @delSparseTensorReader(%[[VAL_7]]) : (!llvm.ptr) -> () // CHECK: return %[[VAL_24]], %[[VAL_15]], %[[VAL_16]], %[[VAL_33]] -func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { - %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor +func.func @sparse_new_coo_permute_no(%arg0: !llvm.ptr) -> tensor { + %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 27d8f296c9ad0c..e4e825bf850439 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -29,14 +29,14 @@ }> // CHECK-LABEL: func @sparse_nop( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr -// CHECK: return %[[A]] : !llvm.ptr +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr +// CHECK: return %[[A]] : !llvm.ptr func.func @sparse_nop(%arg0: tensor) -> tensor { return %arg0 : tensor } // CHECK-LABEL: func @sparse_dim1d( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK: %[[C:.*]] = arith.constant 0 : index // CHECK: %[[D:.*]] = call @sparseLvlSize(%[[A]], %[[C]]) // CHECK: return %[[D]] : index @@ -50,7 +50,7 @@ func.func @sparse_dim1d(%arg0: tensor) -> index { // not be permuted into a query for the size of level 2 (even though // dimension 1 is stored as level 2). // CHECK-LABEL: func @sparse_dim3d( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK: %[[C:.*]] = arith.constant 2 : index // CHECK: %[[D:.*]] = call @sparseLvlSize(%[[A]], %[[C]]) // CHECK: return %[[D]] : index @@ -64,7 +64,7 @@ func.func @sparse_dim3d(%arg0: tensor) -> index { // constant (and we should be sure to get the size of dimension 1, // not dimension 2 nor level 1). // CHECK-LABEL: func @sparse_dim3d_const( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK: %[[C:.*]] = arith.constant 20 : index // CHECK: return %[[C]] : index func.func @sparse_dim3d_const(%arg0: tensor<10x20x30xf64, #SparseTensor>) -> index { @@ -74,7 +74,7 @@ func.func @sparse_dim3d_const(%arg0: tensor<10x20x30xf64, #SparseTensor>) -> ind } // CHECK-LABEL: func @sparse_new1d( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr // CHECK-DAG: %[[DimShape0:.*]] = memref.alloca() : memref<1xindex> // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<1xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) @@ -84,14 +84,14 @@ func.func @sparse_dim3d_const(%arg0: tensor<10x20x30xf64, #SparseTensor>) -> ind // CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<1xindex> to memref // CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimShape]], %[[DimShape]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[Reader]]) // CHECK: call @delSparseTensorReader(%[[Reader]]) -// CHECK: return %[[T]] : !llvm.ptr -func.func @sparse_new1d(%arg0: !llvm.ptr) -> tensor<128xf64, #SparseVector> { - %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor<128xf64, #SparseVector> +// CHECK: return %[[T]] : !llvm.ptr +func.func @sparse_new1d(%arg0: !llvm.ptr) -> tensor<128xf64, #SparseVector> { + %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor<128xf64, #SparseVector> return %0 : tensor<128xf64, #SparseVector> } // CHECK-LABEL: func @sparse_new2d( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr // CHECK-DAG: %[[DimShape0:.*]] = memref.alloca() : memref<2xindex> // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<2xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) @@ -102,14 +102,14 @@ func.func @sparse_new1d(%arg0: !llvm.ptr) -> tensor<128xf64, #SparseVector> // CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<2xindex> to memref // CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizes]], %[[DimSizes]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[Reader]]) // CHECK: call @delSparseTensorReader(%[[Reader]]) -// CHECK: return %[[T]] : !llvm.ptr -func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { - %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor +// CHECK: return %[[T]] : !llvm.ptr +func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { + %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func @sparse_new3d( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr // CHECK-DAG: %[[DimShape0:.*]] = memref.alloca() : memref<3xindex> // CHECK-DAG: %[[DimShape:.*]] = memref.cast %[[DimShape0]] : memref<3xindex> to memref // CHECK: %[[Reader:.*]] = call @createCheckedSparseTensorReader(%[[A]], %[[DimShape]], %{{.*}}) @@ -124,15 +124,15 @@ func.func @sparse_new2d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[LvlSizes:.*]] = memref.cast %[[LvlSizes0]] : memref<3xindex> to memref // CHECK: %[[T:.*]] = call @newSparseTensor(%[[DimSizes]], %[[LvlSizes]], %[[LvlTypes]], %[[Dim2Lvl]], %[[Lvl2Dim]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[Reader]]) // CHECK: call @delSparseTensorReader(%[[Reader]]) -// CHECK: return %[[T]] : !llvm.ptr -func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { - %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor +// CHECK: return %[[T]] : !llvm.ptr +func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { + %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor } // CHECK-LABEL: func @sparse_init( // CHECK-SAME: %[[I:.*]]: index, -// CHECK-SAME: %[[J:.*]]: index) -> !llvm.ptr +// CHECK-SAME: %[[J:.*]]: index) -> !llvm.ptr // CHECK-DAG: %[[Empty:.*]] = arith.constant 0 : i32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index @@ -144,9 +144,9 @@ func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor to memref // CHECK-DAG: memref.store %[[I]], %[[Sizes0]][%[[C0]]] : memref<2xindex> // CHECK-DAG: memref.store %[[J]], %[[Sizes0]][%[[C1]]] : memref<2xindex> -// CHECK: %[[NP:.*]] = llvm.mlir.zero : !llvm.ptr +// CHECK: %[[NP:.*]] = llvm.mlir.zero : !llvm.ptr // CHECK: %[[T:.*]] = call @newSparseTensor(%[[Sizes]], %[[Sizes]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}, %[[Empty]], %[[NP]]) -// CHECK: return %[[T]] : !llvm.ptr +// CHECK: return %[[T]] : !llvm.ptr func.func @sparse_init(%arg0: index, %arg1: index) -> tensor { %0 = tensor.empty(%arg0, %arg1) : tensor %1 = sparse_tensor.load %0 : tensor @@ -154,8 +154,8 @@ func.func @sparse_init(%arg0: index, %arg1: index) -> tensor { } // CHECK-LABEL: func @sparse_release( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) -// CHECK: call @delSparseTensor(%[[A]]) : (!llvm.ptr) -> () +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK: call @delSparseTensor(%[[A]]) : (!llvm.ptr) -> () // CHECK: return func.func @sparse_release(%arg0: tensor<128xf64, #SparseVector>) { bufferization.dealloc_tensor %arg0 : tensor<128xf64, #SparseVector> @@ -163,17 +163,17 @@ func.func @sparse_release(%arg0: tensor<128xf64, #SparseVector>) { } // CHECK-LABEL: func @sparse_nop_cast( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr -// CHECK: return %[[A]] : !llvm.ptr +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr +// CHECK: return %[[A]] : !llvm.ptr func.func @sparse_nop_cast(%arg0: tensor<64xf32, #SparseVector>) -> tensor { %0 = tensor.cast %arg0 : tensor<64xf32, #SparseVector> to tensor return %0 : tensor } // CHECK-LABEL: func @sparse_positions( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK: %[[C:.*]] = arith.constant 0 : index -// CHECK: %[[T:.*]] = call @sparsePositions0(%[[A]], %[[C]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[T:.*]] = call @sparsePositions0(%[[A]], %[[C]]) : (!llvm.ptr, index) -> memref // CHECK: return %[[T]] : memref func.func @sparse_positions(%arg0: tensor<128xf64, #SparseVector>) -> memref { %0 = sparse_tensor.positions %arg0 { level = 0 : index } : tensor<128xf64, #SparseVector> to memref @@ -181,9 +181,9 @@ func.func @sparse_positions(%arg0: tensor<128xf64, #SparseVector>) -> memref) +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK: %[[C:.*]] = arith.constant 0 : index -// CHECK: %[[T:.*]] = call @sparsePositions64(%[[A]], %[[C]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[T:.*]] = call @sparsePositions64(%[[A]], %[[C]]) : (!llvm.ptr, index) -> memref // CHECK: return %[[T]] : memref func.func @sparse_positions64(%arg0: tensor<128xf64, #SparseVector64>) -> memref { %0 = sparse_tensor.positions %arg0 { level = 0 : index } : tensor<128xf64, #SparseVector64> to memref @@ -191,9 +191,9 @@ func.func @sparse_positions64(%arg0: tensor<128xf64, #SparseVector64>) -> memref } // CHECK-LABEL: func @sparse_positions32( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK: %[[C:.*]] = arith.constant 0 : index -// CHECK: %[[T:.*]] = call @sparsePositions32(%[[A]], %[[C]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[T:.*]] = call @sparsePositions32(%[[A]], %[[C]]) : (!llvm.ptr, index) -> memref // CHECK: return %[[T]] : memref func.func @sparse_positions32(%arg0: tensor<128xf64, #SparseVector32>) -> memref { %0 = sparse_tensor.positions %arg0 { level = 0 : index } : tensor<128xf64, #SparseVector32> to memref @@ -201,9 +201,9 @@ func.func @sparse_positions32(%arg0: tensor<128xf64, #SparseVector32>) -> memref } // CHECK-LABEL: func @sparse_indices( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK: %[[C:.*]] = arith.constant 0 : index -// CHECK: %[[T:.*]] = call @sparseCoordinates0(%[[A]], %[[C]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[T:.*]] = call @sparseCoordinates0(%[[A]], %[[C]]) : (!llvm.ptr, index) -> memref // CHECK: return %[[T]] : memref func.func @sparse_indices(%arg0: tensor<128xf64, #SparseVector>) -> memref { %0 = sparse_tensor.coordinates %arg0 { level = 0 : index } : tensor<128xf64, #SparseVector> to memref @@ -211,9 +211,9 @@ func.func @sparse_indices(%arg0: tensor<128xf64, #SparseVector>) -> memref) +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK: %[[C:.*]] = arith.constant 0 : index -// CHECK: %[[T:.*]] = call @sparseCoordinates64(%[[A]], %[[C]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[T:.*]] = call @sparseCoordinates64(%[[A]], %[[C]]) : (!llvm.ptr, index) -> memref // CHECK: return %[[T]] : memref func.func @sparse_indices64(%arg0: tensor<128xf64, #SparseVector64>) -> memref { %0 = sparse_tensor.coordinates %arg0 { level = 0 : index } : tensor<128xf64, #SparseVector64> to memref @@ -221,9 +221,9 @@ func.func @sparse_indices64(%arg0: tensor<128xf64, #SparseVector64>) -> memref) +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK: %[[C:.*]] = arith.constant 0 : index -// CHECK: %[[T:.*]] = call @sparseCoordinates32(%[[A]], %[[C]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[T:.*]] = call @sparseCoordinates32(%[[A]], %[[C]]) : (!llvm.ptr, index) -> memref // CHECK: return %[[T]] : memref func.func @sparse_indices32(%arg0: tensor<128xf64, #SparseVector32>) -> memref { %0 = sparse_tensor.coordinates %arg0 { level = 0 : index } : tensor<128xf64, #SparseVector32> to memref @@ -231,8 +231,8 @@ func.func @sparse_indices32(%arg0: tensor<128xf64, #SparseVector32>) -> memref) -// CHECK: %[[T:.*]] = call @sparseValuesF64(%[[A]]) : (!llvm.ptr) -> memref +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK: %[[T:.*]] = call @sparseValuesF64(%[[A]]) : (!llvm.ptr) -> memref // CHECK: return %[[T]] : memref func.func @sparse_valuesf64(%arg0: tensor<128xf64, #SparseVector>) -> memref { %0 = sparse_tensor.values %arg0 : tensor<128xf64, #SparseVector> to memref @@ -240,8 +240,8 @@ func.func @sparse_valuesf64(%arg0: tensor<128xf64, #SparseVector>) -> memref) -// CHECK: %[[T:.*]] = call @sparseValuesF32(%[[A]]) : (!llvm.ptr) -> memref +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK: %[[T:.*]] = call @sparseValuesF32(%[[A]]) : (!llvm.ptr) -> memref // CHECK: return %[[T]] : memref func.func @sparse_valuesf32(%arg0: tensor<128xf32, #SparseVector>) -> memref { %0 = sparse_tensor.values %arg0: tensor<128xf32, #SparseVector> to memref @@ -249,8 +249,8 @@ func.func @sparse_valuesf32(%arg0: tensor<128xf32, #SparseVector>) -> memref) -// CHECK: %[[T:.*]] = call @sparseValuesI32(%[[A]]) : (!llvm.ptr) -> memref +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK: %[[T:.*]] = call @sparseValuesI32(%[[A]]) : (!llvm.ptr) -> memref // CHECK: return %[[T]] : memref func.func @sparse_valuesi32(%arg0: tensor<128xi32, #SparseVector>) -> memref { %0 = sparse_tensor.values %arg0: tensor<128xi32, #SparseVector> to memref @@ -258,8 +258,8 @@ func.func @sparse_valuesi32(%arg0: tensor<128xi32, #SparseVector>) -> memref) -// CHECK: %[[T:.*]] = call @sparseValuesI16(%[[A]]) : (!llvm.ptr) -> memref +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK: %[[T:.*]] = call @sparseValuesI16(%[[A]]) : (!llvm.ptr) -> memref // CHECK: return %[[T]] : memref func.func @sparse_valuesi16(%arg0: tensor<128xi16, #SparseVector>) -> memref { %0 = sparse_tensor.values %arg0: tensor<128xi16, #SparseVector> to memref @@ -267,8 +267,8 @@ func.func @sparse_valuesi16(%arg0: tensor<128xi16, #SparseVector>) -> memref) -// CHECK: %[[T:.*]] = call @sparseValuesI8(%[[A]]) : (!llvm.ptr) -> memref +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK: %[[T:.*]] = call @sparseValuesI8(%[[A]]) : (!llvm.ptr) -> memref // CHECK: return %[[T]] : memref func.func @sparse_valuesi8(%arg0: tensor<128xi8, #SparseVector>) -> memref { %0 = sparse_tensor.values %arg0: tensor<128xi8, #SparseVector> to memref @@ -276,9 +276,9 @@ func.func @sparse_valuesi8(%arg0: tensor<128xi8, #SparseVector>) -> memref } // CHECK-LABEL: func @sparse_noe( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr) +// CHECK-SAME: %[[A:.*]]: !llvm.ptr) // CHECK-DAG: %[[C:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[T:.*]] = call @sparseValuesF64(%[[A]]) : (!llvm.ptr) -> memref +// CHECK-DAG: %[[T:.*]] = call @sparseValuesF64(%[[A]]) : (!llvm.ptr) -> memref // CHECK: %[[NOE:.*]] = memref.dim %[[T]], %[[C]] : memref // CHECK: return %[[NOE]] : index func.func @sparse_noe(%arg0: tensor<128xf64, #SparseVector>) -> index { @@ -287,34 +287,34 @@ func.func @sparse_noe(%arg0: tensor<128xf64, #SparseVector>) -> index { } // CHECK-LABEL: func @sparse_reconstruct( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr -// CHECK: return %[[A]] : !llvm.ptr +// CHECK-SAME: %[[A:.*]]: !llvm.ptr +// CHECK: return %[[A]] : !llvm.ptr func.func @sparse_reconstruct(%arg0: tensor<128xf32, #SparseVector>) -> tensor<128xf32, #SparseVector> { %0 = sparse_tensor.load %arg0 : tensor<128xf32, #SparseVector> return %0 : tensor<128xf32, #SparseVector> } // CHECK-LABEL: func @sparse_reconstruct_ins( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr -// CHECK: call @endLexInsert(%[[A]]) : (!llvm.ptr) -> () -// CHECK: return %[[A]] : !llvm.ptr +// CHECK-SAME: %[[A:.*]]: !llvm.ptr +// CHECK: call @endLexInsert(%[[A]]) : (!llvm.ptr) -> () +// CHECK: return %[[A]] : !llvm.ptr func.func @sparse_reconstruct_ins(%arg0: tensor<128xf32, #SparseVector>) -> tensor<128xf32, #SparseVector> { %0 = sparse_tensor.load %arg0 hasInserts : tensor<128xf32, #SparseVector> return %0 : tensor<128xf32, #SparseVector> } // CHECK-LABEL: func @sparse_insert( -// CHECK-SAME: %[[A:.*]]: !llvm.ptr, +// CHECK-SAME: %[[A:.*]]: !llvm.ptr, // CHECK-SAME: %[[B:.*]]: index, -// CHECK-SAME: %[[C:.*]]: f32) -> !llvm.ptr { +// CHECK-SAME: %[[C:.*]]: f32) -> !llvm.ptr { // CHECK-DAG: %[[M:.*]] = memref.alloca() : memref<1xindex> // CHECK-DAG: %[[V:.*]] = memref.alloca() : memref // CHECK-DAG: %[[MC:.*]] = memref.cast %[[M]] : memref<1xindex> to memref // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: memref.store %[[B]], %[[M]][%[[C0]]] : memref<1xindex> // CHECK-DAG: memref.store %[[C]], %[[V]][] : memref -// CHECK: call @lexInsertF32(%[[A]], %[[MC]], %[[V]]) : (!llvm.ptr, memref, memref) -> () -// CHECK: return %[[A]] : !llvm.ptr +// CHECK: call @lexInsertF32(%[[A]], %[[MC]], %[[V]]) : (!llvm.ptr, memref, memref) -> () +// CHECK: return %[[A]] : !llvm.ptr func.func @sparse_insert(%arg0: tensor<128xf32, #SparseVector>, %arg1: index, %arg2: f32) -> tensor<128xf32, #SparseVector> { @@ -372,12 +372,12 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { } // CHECK-LABEL: func @sparse_compression( -// CHECK-SAME: %[[A:.*0]]: !llvm.ptr, +// CHECK-SAME: %[[A:.*0]]: !llvm.ptr, // CHECK-SAME: %[[B:.*1]]: memref, // CHECK-SAME: %[[C:.*2]]: memref, // CHECK-SAME: %[[D:.*3]]: memref, // CHECK-SAME: %[[E:.*4]]: index, -// CHECK-SAME: %[[F:.*5]]: index) -> !llvm.ptr { +// CHECK-SAME: %[[F:.*5]]: index) -> !llvm.ptr { // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[X:.*]] = memref.alloca() : memref<2xindex> // CHECK-DAG: %[[Y:.*]] = memref.cast %[[X]] : memref<2xindex> to memref @@ -386,7 +386,7 @@ func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { // CHECK-DAG: memref.dealloc %[[B]] : memref // CHECK-DAG: memref.dealloc %[[C]] : memref // CHECK-DAG: memref.dealloc %[[D]] : memref -// CHECK: return %[[A]] : !llvm.ptr +// CHECK: return %[[A]] : !llvm.ptr func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, %values: memref, %filled: memref, @@ -401,7 +401,7 @@ func.func @sparse_compression(%tensor: tensor<8x8xf64, #CSR>, // CHECK-LABEL: func @sparse_and_dense_init( // CHECK: %[[S:.*]] = call @newSparseTensor // CHECK: %[[D:.*]] = tensor.empty -// CHECK: return %[[S]], %[[D]] : !llvm.ptr, tensor +// CHECK: return %[[S]], %[[D]] : !llvm.ptr, tensor func.func @sparse_and_dense_init(%arg0: index, %arg1: index) -> (tensor, tensor) { %0 = tensor.empty(%arg0, %arg1) : tensor diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir index 0f367f12483f63..4817771a6044ce 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir @@ -46,7 +46,7 @@ // CHECK-SPARSE: return %[[RET]] // // CHECK-CONVERT-LABEL: func @kernel( -// CHECK-CONVERT-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr +// CHECK-CONVERT-SAME: %[[A:.*]]: !llvm.ptr) -> !llvm.ptr // CHECK-CONVERT-DAG: %[[C1:.*]] = arith.constant 1 : index // CHECK-CONVERT-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-CONVERT: %[[N:.*]] = call @sparseLvlSize(%[[A]], %[[C1]]) diff --git a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir index 8ecbc1da965a15..988ab7f85be413 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir @@ -3,8 +3,8 @@ #DCSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }> // CHECK-LABEL: func.func @fill_zero_after_alloc( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr, -// CHECK-SAME: %[[VAL_1:.*]]: !llvm.ptr) -> !llvm.ptr { +// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr, +// CHECK-SAME: %[[VAL_1:.*]]: !llvm.ptr) -> !llvm.ptr { // CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : i32 // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : i32 @@ -27,8 +27,8 @@ // CHECK: %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<2xindex> to memref // CHECK: memref.store %[[VAL_5]], %[[VAL_16]]{{\[}}%[[VAL_5]]] : memref<2xindex> // CHECK: memref.store %[[VAL_6]], %[[VAL_16]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: %[[VAL_18:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_19:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_15]], %[[VAL_13]], %[[VAL_17]], %[[VAL_17]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_4]], %[[VAL_18]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[VAL_18:.*]] = llvm.mlir.zero : !llvm.ptr +// CHECK: %[[VAL_19:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_15]], %[[VAL_13]], %[[VAL_17]], %[[VAL_17]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_4]], %[[VAL_18]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr // CHECK: %[[VAL_20:.*]] = memref.alloc() : memref<300xf64> // CHECK: %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<300xf64> to memref // CHECK: %[[VAL_22:.*]] = memref.alloc() : memref<300xi1> @@ -37,16 +37,16 @@ // CHECK: %[[VAL_25:.*]] = memref.cast %[[VAL_24]] : memref<300xindex> to memref // CHECK: linalg.fill ins(%[[VAL_2]] : f64) outs(%[[VAL_20]] : memref<300xf64>) // CHECK: linalg.fill ins(%[[VAL_7]] : i1) outs(%[[VAL_22]] : memref<300xi1>) -// CHECK: %[[VAL_26:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_27:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_28:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_29:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_30:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref -// CHECK: %[[VAL_31:.*]] = call @sparsePositions0(%[[VAL_1]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_32:.*]] = call @sparseCoordinates0(%[[VAL_1]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_33:.*]] = call @sparsePositions0(%[[VAL_1]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_34:.*]] = call @sparseCoordinates0(%[[VAL_1]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref -// CHECK: %[[VAL_35:.*]] = call @sparseValuesF64(%[[VAL_1]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_26:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_27:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_28:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_29:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_30:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref +// CHECK: %[[VAL_31:.*]] = call @sparsePositions0(%[[VAL_1]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_32:.*]] = call @sparseCoordinates0(%[[VAL_1]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_33:.*]] = call @sparsePositions0(%[[VAL_1]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_34:.*]] = call @sparseCoordinates0(%[[VAL_1]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref +// CHECK: %[[VAL_35:.*]] = call @sparseValuesF64(%[[VAL_1]]) : (!llvm.ptr) -> memref // CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_26]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_26]]{{\[}}%[[VAL_6]]] : memref // CHECK: scf.for %[[VAL_38:.*]] = %[[VAL_36]] to %[[VAL_37]] step %[[VAL_6]] { @@ -107,13 +107,13 @@ // CHECK: %[[VAL_83:.*]] = memref.alloca() : memref<2xindex> // CHECK: %[[VAL_84:.*]] = memref.cast %[[VAL_83]] : memref<2xindex> to memref // CHECK: memref.store %[[VAL_39]], %[[VAL_83]]{{\[}}%[[VAL_5]]] : memref<2xindex> -// CHECK: func.call @expInsertF64(%[[VAL_19]], %[[VAL_84]], %[[VAL_21]], %[[VAL_23]], %[[VAL_25]], %[[VAL_85:.*]]#2) : (!llvm.ptr, memref, memref, memref, memref, index) -> () +// CHECK: func.call @expInsertF64(%[[VAL_19]], %[[VAL_84]], %[[VAL_21]], %[[VAL_23]], %[[VAL_25]], %[[VAL_85:.*]]#2) : (!llvm.ptr, memref, memref, memref, memref, index) -> () // CHECK: } // CHECK: memref.dealloc %[[VAL_20]] : memref<300xf64> // CHECK: memref.dealloc %[[VAL_22]] : memref<300xi1> // CHECK: memref.dealloc %[[VAL_24]] : memref<300xindex> -// CHECK: call @endLexInsert(%[[VAL_19]]) : (!llvm.ptr) -> () -// CHECK: return %[[VAL_19]] : !llvm.ptr +// CHECK: call @endLexInsert(%[[VAL_19]]) : (!llvm.ptr) -> () +// CHECK: return %[[VAL_19]] : !llvm.ptr // CHECK: } func.func @fill_zero_after_alloc(%arg0: tensor<100x200xf64, #DCSR>, %arg1: tensor<200x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR> { diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir index 3b8b86010edd28..13245f427a7970 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir @@ -52,15 +52,15 @@ // CHECK-HIR: } // CHECK-MIR-LABEL: func @matvec( -// CHECK-MIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, +// CHECK-MIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, // CHECK-MIR-SAME: %[[VAL_1:.*]]: tensor<64xf64>, // CHECK-MIR-SAME: %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> { // CHECK-MIR-DAG: %[[VAL_3:.*]] = arith.constant 32 : index // CHECK-MIR-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-MIR-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-MIR-DAG: %[[VAL_6:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK-MIR-DAG: %[[VAL_7:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK-MIR-DAG: %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref +// CHECK-MIR-DAG: %[[VAL_6:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref +// CHECK-MIR-DAG: %[[VAL_7:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref +// CHECK-MIR-DAG: %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref // CHECK-MIR-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64> // CHECK-MIR-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> // CHECK-MIR: scf.for %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { @@ -83,15 +83,15 @@ // CHECK-MIR: } // CHECK-LIR-LABEL: func @matvec( -// CHECK-LIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, +// CHECK-LIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, // CHECK-LIR-SAME: %[[VAL_1:.*]]: memref<64xf64>, // CHECK-LIR-SAME: %[[VAL_2:.*]]: memref<32xf64>) -> memref<32xf64> { // CHECK-LIR-DAG: %[[VAL_3:.*]] = arith.constant 32 : index // CHECK-LIR-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-LIR-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-LIR-DAG: %[[VAL_6:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK-LIR-DAG: %[[VAL_7:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK-LIR-DAG: %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref +// CHECK-LIR-DAG: %[[VAL_6:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref +// CHECK-LIR-DAG: %[[VAL_7:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref +// CHECK-LIR-DAG: %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref // CHECK-LIR: scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK-LIR-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref // CHECK-LIR-DAG: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir index 4d2a3d45aae845..a987d59f677313 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir @@ -53,15 +53,15 @@ // CHECK-HIR: } // CHECK-MIR-LABEL: func @matvec( -// CHECK-MIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, +// CHECK-MIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, // CHECK-MIR-SAME: %[[VAL_1:.*]]: tensor<64xf64>, // CHECK-MIR-SAME: %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> { // CHECK-MIR-DAG: %[[VAL_3:.*]] = arith.constant 64 : index // CHECK-MIR-DAG: %[[VAL_5:.*]] = arith.constant 0 : index // CHECK-MIR-DAG: %[[VAL_6:.*]] = arith.constant 1 : index -// CHECK-MIR-DAG: %[[VAL_7:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref -// CHECK-MIR-DAG: %[[VAL_8:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref -// CHECK-MIR-DAG: %[[VAL_9:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref +// CHECK-MIR-DAG: %[[VAL_7:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref +// CHECK-MIR-DAG: %[[VAL_8:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref +// CHECK-MIR-DAG: %[[VAL_9:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref // CHECK-MIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64> // CHECK-MIR-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> // CHECK-MIR: scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { @@ -83,15 +83,15 @@ // CHECK-MIR: } // CHECK-LIR-LABEL: func @matvec( -// CHECK-LIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, +// CHECK-LIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, // CHECK-LIR-SAME: %[[VAL_1:.*]]: memref<64xf64>, // CHECK-LIR-SAME: %[[VAL_2:.*]]: memref<32xf64>) -> memref<32xf64> { // CHECK-LIR-DAG: %[[VAL_3:.*]] = arith.constant 64 : index // CHECK-LIR-DAG: %[[VAL_5:.*]] = arith.constant 0 : index // CHECK-LIR-DAG: %[[VAL_6:.*]] = arith.constant 1 : index -// CHECK-LIR-DAG: %[[VAL_7:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref -// CHECK-LIR-DAG: %[[VAL_8:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref -// CHECK-LIR-DAG: %[[VAL_9:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref +// CHECK-LIR-DAG: %[[VAL_7:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref +// CHECK-LIR-DAG: %[[VAL_8:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref +// CHECK-LIR-DAG: %[[VAL_9:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref // CHECK-LIR: scf.for %[[VAL_13:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK-LIR: %[[VAL_14:.*]] = memref.load %[[VAL_1]]{{\[}}%[[VAL_13]]] : memref<64xf64> // CHECK-LIR: %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir index dc81acff504239..0e09f658fa15ca 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir @@ -52,15 +52,15 @@ // CHECK-HIR: } // CHECK-MIR-LABEL: func @matvec( -// CHECK-MIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, +// CHECK-MIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, // CHECK-MIR-SAME: %[[VAL_1:.*]]: tensor<64xf64>, // CHECK-MIR-SAME: %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> { // CHECK-MIR-DAG: %[[VAL_3:.*]] = arith.constant 32 : index // CHECK-MIR-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-MIR-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-MIR: %[[VAL_6:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK-MIR: %[[VAL_7:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK-MIR: %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref +// CHECK-MIR: %[[VAL_6:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref +// CHECK-MIR: %[[VAL_7:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref +// CHECK-MIR: %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref // CHECK-MIR: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64> // CHECK-MIR: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64> // CHECK-MIR: scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { @@ -83,15 +83,15 @@ // CHECK-MIR: } // CHECK-LIR-LABEL: func @matvec( -// CHECK-LIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, +// CHECK-LIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, // CHECK-LIR-SAME: %[[VAL_1:.*]]: memref<64xf64>, // CHECK-LIR-SAME: %[[VAL_2:.*]]: memref<32xf64>) -> memref<32xf64> { // CHECK-LIR-DAG: %[[VAL_3:.*]] = arith.constant 32 : index // CHECK-LIR-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-LIR-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-LIR: %[[VAL_6:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK-LIR: %[[VAL_7:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref -// CHECK-LIR: %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref +// CHECK-LIR: %[[VAL_6:.*]] = call @sparsePositions0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref +// CHECK-LIR: %[[VAL_7:.*]] = call @sparseCoordinates0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref +// CHECK-LIR: %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref // CHECK-LIR: scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK-LIR-DAG: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref // CHECK-LIR-DAG: %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_5]] : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir index 42726d998ac7a7..02738a9e4544ac 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir @@ -48,7 +48,7 @@ // CHECK-HIR: } // // CHECK-MIR-LABEL: func @sparse_dynamic_dims( -// CHECK-MIR-SAME: %[[ARGA:.*]]: !llvm.ptr, +// CHECK-MIR-SAME: %[[ARGA:.*]]: !llvm.ptr, // CHECK-MIR-SAME: %[[ARGX:.*]]: tensor) -> tensor { // CHECK-MIR-DAG: %[[I0:.*]] = arith.constant 0 : index // CHECK-MIR-DAG: %[[I1:.*]] = arith.constant 1 : index @@ -56,7 +56,7 @@ // CHECK-MIR-DAG: %[[DimSize0:.*]] = call @sparseLvlSize(%[[ARGA]], %[[I0]]) // CHECK-MIR-DAG: %[[DimSize1:.*]] = call @sparseLvlSize(%[[ARGA]], %[[I1]]) // CHECK-MIR-DAG: %[[DimSize2:.*]] = call @sparseLvlSize(%[[ARGA]], %[[I2]]) -// CHECK-MIR-DAG: %[[VAL_8:.*]] = call @sparseValuesF32(%[[ARGA]]) : (!llvm.ptr) -> memref +// CHECK-MIR-DAG: %[[VAL_8:.*]] = call @sparseValuesF32(%[[ARGA]]) : (!llvm.ptr) -> memref // CHECK-MIR-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[ARGX]] : memref // CHECK-MIR: %[[VAL_11:.*]] = tensor.extract %[[ARGX]][] : tensor // CHECK-MIR: %[[VAL_12:.*]] = scf.for %[[D2:.*]] = %[[I0]] to %[[DimSize0]] step %[[I1]] iter_args(%[[VAL_14:.*]] = %[[VAL_11]]) -> (f32) { From fcb9a8a5eeeba0823a8cd3d364acf4b04b6dc7ab Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Tue, 31 Oct 2023 07:36:11 +0100 Subject: [PATCH 117/144] Reland "[MLIR][FuncToLLVM] Remove typed pointer support" (#70717) This relands 6a0f6dd8359b38340442b7e6b14629c1d6c54a81 that was reverted due to a missing integration test change. This commit removes the support for lowering Func to LLVM dialect with typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect. Original PR: https://github.com/llvm/llvm-project/pull/70574 --- mlir/include/mlir/Conversion/Passes.td | 3 - mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp | 1 - .../FuncToLLVM/calling-convention.mlir | 4 +- .../FuncToLLVM/convert-argattrs.mlir | 2 +- .../FuncToLLVM/convert-data-layout.mlir | 2 +- .../Conversion/FuncToLLVM/convert-funcs.mlir | 2 +- .../emit-c-wrappers-for-external-callers.mlir | 2 +- ...mit-c-wrappers-for-external-functions.mlir | 2 +- .../FuncToLLVM/func-memref-return.mlir | 4 +- .../Conversion/FuncToLLVM/func-memref.mlir | 4 +- .../Conversion/FuncToLLVM/func-to-llvm.mlir | 4 +- mlir/test/Conversion/FuncToLLVM/invalid.mlir | 2 +- .../Conversion/FuncToLLVM/typed-pointers.mlir | 114 ------------------ .../mlir-vulkan-runner/mlir-vulkan-runner.cpp | 3 +- 14 files changed, 15 insertions(+), 134 deletions(-) delete mode 100644 mlir/test/Conversion/FuncToLLVM/typed-pointers.mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index cf6e545749ffc6..a2307bc243f615 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -409,9 +409,6 @@ def ConvertFuncToLLVMPass : Pass<"convert-func-to-llvm", "ModuleOp"> { Option<"indexBitwidth", "index-bitwidth", "unsigned", /*default=kDeriveIndexBitwidthFromDataLayout*/"0", "Bitwidth of the index type, 0 to use size of machine word">, - Option<"useOpaquePointers", "use-opaque-pointers", "bool", - /*default=*/"true", "Generate LLVM IR using opaque pointers " - "instead of typed pointers">, ]; } diff --git a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp index 3506f50916132d..3126d1dee32cbc 100644 --- a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp +++ b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp @@ -790,7 +790,6 @@ struct ConvertFuncToLLVMPass if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout) options.overrideIndexBitwidth(indexBitwidth); options.dataLayout = llvm::DataLayout(dataLayout); - options.useOpaquePointers = useOpaquePointers; LLVMTypeConverter typeConverter(&getContext(), options, &dataLayoutAnalysis); diff --git a/mlir/test/Conversion/FuncToLLVM/calling-convention.mlir b/mlir/test/Conversion/FuncToLLVM/calling-convention.mlir index 1ed67708875604..7cdb89e1f72d28 100644 --- a/mlir/test/Conversion/FuncToLLVM/calling-convention.mlir +++ b/mlir/test/Conversion/FuncToLLVM/calling-convention.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -finalize-memref-to-llvm='use-opaque-pointers=1' -llvm-request-c-wrappers -convert-func-to-llvm='use-opaque-pointers=1' -reconcile-unrealized-casts %s | FileCheck %s -// RUN: mlir-opt -finalize-memref-to-llvm='use-opaque-pointers=1' -convert-func-to-llvm='use-opaque-pointers=1' -reconcile-unrealized-casts %s | FileCheck %s --check-prefix=EMIT_C_ATTRIBUTE +// RUN: mlir-opt -finalize-memref-to-llvm -llvm-request-c-wrappers -convert-func-to-llvm -reconcile-unrealized-casts %s | FileCheck %s +// RUN: mlir-opt -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts %s | FileCheck %s --check-prefix=EMIT_C_ATTRIBUTE // This tests the default memref calling convention and the emission of C // wrappers. We don't need to separate runs because the wrapper-emission diff --git a/mlir/test/Conversion/FuncToLLVM/convert-argattrs.mlir b/mlir/test/Conversion/FuncToLLVM/convert-argattrs.mlir index 41aff17d86919f..85c7cbddfdbf63 100644 --- a/mlir/test/Conversion/FuncToLLVM/convert-argattrs.mlir +++ b/mlir/test/Conversion/FuncToLLVM/convert-argattrs.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -convert-func-to-llvm='use-opaque-pointers=1' %s | FileCheck %s +// RUN: mlir-opt -convert-func-to-llvm %s | FileCheck %s // CHECK-LABEL: func @check_attributes // CHECK-SAME: {dialect.a = true, dialect.b = 4 : i64} diff --git a/mlir/test/Conversion/FuncToLLVM/convert-data-layout.mlir b/mlir/test/Conversion/FuncToLLVM/convert-data-layout.mlir index fb33d4fdfbe7c9..0e7c16ec507998 100644 --- a/mlir/test/Conversion/FuncToLLVM/convert-data-layout.mlir +++ b/mlir/test/Conversion/FuncToLLVM/convert-data-layout.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -set-llvm-module-datalayout -convert-func-to-llvm='use-opaque-pointers=1' %s | FileCheck %s +// RUN: mlir-opt -set-llvm-module-datalayout -convert-func-to-llvm %s | FileCheck %s // RUN-32: mlir-opt -set-llvm-module-datalayout='data-layout=p:32:32:32' -convert-func-to-llvm='use-opaque-pointers=1' %s \ // RUN-32: | FileCheck %s diff --git a/mlir/test/Conversion/FuncToLLVM/convert-funcs.mlir b/mlir/test/Conversion/FuncToLLVM/convert-funcs.mlir index 9fe5ad5cdda65f..765d8469f3c561 100644 --- a/mlir/test/Conversion/FuncToLLVM/convert-funcs.mlir +++ b/mlir/test/Conversion/FuncToLLVM/convert-funcs.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -convert-func-to-llvm='use-opaque-pointers=1' -split-input-file -verify-diagnostics %s | FileCheck %s +// RUN: mlir-opt -convert-func-to-llvm -split-input-file -verify-diagnostics %s | FileCheck %s //CHECK: llvm.func @second_order_arg(!llvm.ptr) func.func private @second_order_arg(%arg0 : () -> ()) diff --git a/mlir/test/Conversion/FuncToLLVM/emit-c-wrappers-for-external-callers.mlir b/mlir/test/Conversion/FuncToLLVM/emit-c-wrappers-for-external-callers.mlir index dd474e14011057..826ca9540ae565 100644 --- a/mlir/test/Conversion/FuncToLLVM/emit-c-wrappers-for-external-callers.mlir +++ b/mlir/test/Conversion/FuncToLLVM/emit-c-wrappers-for-external-callers.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -llvm-request-c-wrappers -convert-func-to-llvm='use-opaque-pointers=1' %s | FileCheck %s +// RUN: mlir-opt -llvm-request-c-wrappers -convert-func-to-llvm %s | FileCheck %s // CHECK: llvm.func @res_attrs_with_memref_return() -> (!llvm.struct{{.*}} {test.returnOne}) // CHECK-LABEL: llvm.func @_mlir_ciface_res_attrs_with_memref_return diff --git a/mlir/test/Conversion/FuncToLLVM/emit-c-wrappers-for-external-functions.mlir b/mlir/test/Conversion/FuncToLLVM/emit-c-wrappers-for-external-functions.mlir index 027d29b0bf079a..28c2638c7be519 100644 --- a/mlir/test/Conversion/FuncToLLVM/emit-c-wrappers-for-external-functions.mlir +++ b/mlir/test/Conversion/FuncToLLVM/emit-c-wrappers-for-external-functions.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -llvm-request-c-wrappers -convert-func-to-llvm='use-opaque-pointers=1' %s | FileCheck %s +// RUN: mlir-opt -llvm-request-c-wrappers -convert-func-to-llvm %s | FileCheck %s // CHECK: llvm.func private @res_attrs_with_memref_return() -> (!llvm.struct{{.*}} {test.returnOne}) // CHECK-LABEL: llvm.func @_mlir_ciface_res_attrs_with_memref_return diff --git a/mlir/test/Conversion/FuncToLLVM/func-memref-return.mlir b/mlir/test/Conversion/FuncToLLVM/func-memref-return.mlir index b584d4ce28f52a..91ef571cb3bf71 100644 --- a/mlir/test/Conversion/FuncToLLVM/func-memref-return.mlir +++ b/mlir/test/Conversion/FuncToLLVM/func-memref-return.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt -convert-func-to-llvm='use-opaque-pointers=1' -reconcile-unrealized-casts %s | FileCheck %s +// RUN: mlir-opt -convert-func-to-llvm -reconcile-unrealized-casts %s | FileCheck %s -// RUN: mlir-opt -convert-func-to-llvm='use-bare-ptr-memref-call-conv=1 use-opaque-pointers=1' %s | FileCheck %s --check-prefix=BAREPTR +// RUN: mlir-opt -convert-func-to-llvm='use-bare-ptr-memref-call-conv=1' %s | FileCheck %s --check-prefix=BAREPTR // RUN: mlir-opt -transform-interpreter %s | FileCheck %s --check-prefix=BAREPTR diff --git a/mlir/test/Conversion/FuncToLLVM/func-memref.mlir b/mlir/test/Conversion/FuncToLLVM/func-memref.mlir index b61287643dca94..d44a07bdcc9ab0 100644 --- a/mlir/test/Conversion/FuncToLLVM/func-memref.mlir +++ b/mlir/test/Conversion/FuncToLLVM/func-memref.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm),convert-func-to-llvm{use-opaque-pointers=1},reconcile-unrealized-casts)" -split-input-file %s | FileCheck %s -// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1 use-opaque-pointers=1},reconcile-unrealized-casts)" -split-input-file %s | FileCheck %s --check-prefix=BAREPTR +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" -split-input-file %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts)" -split-input-file %s | FileCheck %s --check-prefix=BAREPTR // BAREPTR-LABEL: func @check_noalias // BAREPTR-SAME: %{{.*}}: !llvm.ptr {llvm.noalias}, %{{.*}}: !llvm.ptr {llvm.noalias} diff --git a/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir b/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir index 8254e77c8628bd..9cc6bbf0873abd 100644 --- a/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir +++ b/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm{use-opaque-pointers=1},reconcile-unrealized-casts)" %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" %s | FileCheck %s -// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-math-to-llvm,convert-arith-to-llvm{index-bitwidth=32}),convert-func-to-llvm{index-bitwidth=32 use-opaque-pointers=1},reconcile-unrealized-casts)" %s | FileCheck --check-prefix=CHECK32 %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-math-to-llvm,convert-arith-to-llvm{index-bitwidth=32}),convert-func-to-llvm{index-bitwidth=32},reconcile-unrealized-casts)" %s | FileCheck --check-prefix=CHECK32 %s // RUN: mlir-opt -transform-interpreter %s | FileCheck --check-prefix=CHECK32 %s diff --git a/mlir/test/Conversion/FuncToLLVM/invalid.mlir b/mlir/test/Conversion/FuncToLLVM/invalid.mlir index 798d0a8519efeb..e70252ff87ed13 100644 --- a/mlir/test/Conversion/FuncToLLVM/invalid.mlir +++ b/mlir/test/Conversion/FuncToLLVM/invalid.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-func-to-llvm='use-opaque-pointers=1' -verify-diagnostics -split-input-file +// RUN: mlir-opt %s -convert-func-to-llvm -verify-diagnostics -split-input-file // Should not crash on unsupported types in function signatures. func.func private @unsupported_signature() -> tensor<10 x i32> diff --git a/mlir/test/Conversion/FuncToLLVM/typed-pointers.mlir b/mlir/test/Conversion/FuncToLLVM/typed-pointers.mlir deleted file mode 100644 index 7b3b816cc38bb1..00000000000000 --- a/mlir/test/Conversion/FuncToLLVM/typed-pointers.mlir +++ /dev/null @@ -1,114 +0,0 @@ -// RUN: mlir-opt -convert-func-to-llvm='use-opaque-pointers=0' -split-input-file %s | FileCheck %s - -//CHECK: llvm.func @second_order_arg(!llvm.ptr>) -func.func private @second_order_arg(%arg0 : () -> ()) - -//CHECK: llvm.func @second_order_result() -> !llvm.ptr> -func.func private @second_order_result() -> (() -> ()) - -//CHECK: llvm.func @second_order_multi_result() -> !llvm.struct<(ptr>, ptr>, ptr>)> -func.func private @second_order_multi_result() -> (() -> (i32), () -> (i64), () -> (f32)) - -//CHECK: llvm.func @third_order(!llvm.ptr> (ptr>)>>) -> !llvm.ptr> (ptr>)>> -func.func private @third_order(%arg0 : (() -> ()) -> (() -> ())) -> ((() -> ()) -> (() -> ())) - -//CHECK: llvm.func @fifth_order_left(!llvm.ptr>)>>)>>)>>) -func.func private @fifth_order_left(%arg0: (((() -> ()) -> ()) -> ()) -> ()) - -//CHECK: llvm.func @fifth_order_right(!llvm.ptr> ()>> ()>> ()>>) -func.func private @fifth_order_right(%arg0: () -> (() -> (() -> (() -> ())))) - -// Check that memrefs are converted to argument packs if appear as function arguments. -// CHECK: llvm.func @memref_call_conv(!llvm.ptr, !llvm.ptr, i64, i64, i64) -func.func private @memref_call_conv(%arg0: memref) - -// Same in nested functions. -// CHECK: llvm.func @memref_call_conv_nested(!llvm.ptr, ptr, i64, i64, i64)>>) -func.func private @memref_call_conv_nested(%arg0: (memref) -> ()) - -//CHECK-LABEL: llvm.func @pass_through(%arg0: !llvm.ptr>) -> !llvm.ptr> { -func.func @pass_through(%arg0: () -> ()) -> (() -> ()) { -// CHECK-NEXT: llvm.br ^bb1(%arg0 : !llvm.ptr>) - cf.br ^bb1(%arg0 : () -> ()) - -//CHECK-NEXT: ^bb1(%0: !llvm.ptr>): -^bb1(%bbarg: () -> ()): -// CHECK-NEXT: llvm.return %0 : !llvm.ptr> - return %bbarg : () -> () -} - -// CHECK-LABEL: llvm.func @indirect_call(%arg0: !llvm.ptr>, %arg1: f32) -> i32 { -func.func @indirect_call(%arg0: (f32) -> i32, %arg1: f32) -> i32 { -// CHECK-NEXT: %0 = llvm.call %arg0(%arg1) : !llvm.ptr>, (f32) -> i32 - %0 = call_indirect %arg0(%arg1) : (f32) -> i32 -// CHECK-NEXT: llvm.return %0 : i32 - return %0 : i32 -} - -// CHECK-LABEL: llvm.func @get_i64() -> i64 -func.func private @get_i64() -> (i64) -// CHECK-LABEL: llvm.func @get_f32() -> f32 -func.func private @get_f32() -> (f32) -// CHECK-LABEL: llvm.func @get_memref() -> !llvm.struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)> -func.func private @get_memref() -> (memref<42x?x10x?xf32>) - -// CHECK-LABEL: llvm.func @multireturn() -> !llvm.struct<(i64, f32, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> { -func.func @multireturn() -> (i64, f32, memref<42x?x10x?xf32>) { -^bb0: -// CHECK-NEXT: {{.*}} = llvm.call @get_i64() : () -> i64 -// CHECK-NEXT: {{.*}} = llvm.call @get_f32() : () -> f32 -// CHECK-NEXT: {{.*}} = llvm.call @get_memref() : () -> !llvm.struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)> - %0 = call @get_i64() : () -> (i64) - %1 = call @get_f32() : () -> (f32) - %2 = call @get_memref() : () -> (memref<42x?x10x?xf32>) -// CHECK-NEXT: {{.*}} = llvm.mlir.undef : !llvm.struct<(i64, f32, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> -// CHECK-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm.struct<(i64, f32, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> -// CHECK-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm.struct<(i64, f32, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> -// CHECK-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.struct<(i64, f32, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> -// CHECK-NEXT: llvm.return {{.*}} : !llvm.struct<(i64, f32, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> - return %0, %1, %2 : i64, f32, memref<42x?x10x?xf32> -} - -//===========================================================================// -// Calling convention on returning unranked memrefs. -// IR below produced by running -finalize-memref-to-llvm without opaque -// pointers on calling-convention.mlir -//===========================================================================// - -func.func @return_var_memref(%arg0: memref<4x3xf32>) -> memref<*xf32> attributes {llvm.emit_c_interface} { - %0 = builtin.unrealized_conversion_cast %arg0 : memref<4x3xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - %1 = llvm.mlir.constant(1 : index) : i64 - %2 = llvm.alloca %1 x !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>> - llvm.store %0, %2 : !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>> - %3 = llvm.bitcast %2 : !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>> to !llvm.ptr - %4 = llvm.mlir.constant(2 : index) : i64 - %5 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> - %6 = llvm.insertvalue %4, %5[0] : !llvm.struct<(i64, ptr)> - %7 = llvm.insertvalue %3, %6[1] : !llvm.struct<(i64, ptr)> - %8 = builtin.unrealized_conversion_cast %7 : !llvm.struct<(i64, ptr)> to memref<*xf32> - return %8 : memref<*xf32> -} - -// Check that the result memref is passed as parameter -// CHECK-LABEL: @_mlir_ciface_return_var_memref -// CHECK-SAME: (%{{.*}}: !llvm.ptr)>>, %{{.*}}: !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>>) - -func.func @return_two_var_memref(%arg0: memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>) attributes {llvm.emit_c_interface} { - %0 = builtin.unrealized_conversion_cast %arg0 : memref<4x3xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> - %1 = llvm.mlir.constant(1 : index) : i64 - %2 = llvm.alloca %1 x !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> : (i64) -> !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>> - llvm.store %0, %2 : !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>> - %3 = llvm.bitcast %2 : !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>> to !llvm.ptr - %4 = llvm.mlir.constant(2 : index) : i64 - %5 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> - %6 = llvm.insertvalue %4, %5[0] : !llvm.struct<(i64, ptr)> - %7 = llvm.insertvalue %3, %6[1] : !llvm.struct<(i64, ptr)> - %8 = builtin.unrealized_conversion_cast %7 : !llvm.struct<(i64, ptr)> to memref<*xf32> - return %8, %8 : memref<*xf32>, memref<*xf32> -} - -// Check that the result memrefs are passed as parameter -// CHECK-LABEL: @_mlir_ciface_return_two_var_memref -// CHECK-SAME: (%{{.*}}: !llvm.ptr)>, struct<(i64, ptr)>)>>, -// CHECK-SAME: %{{.*}}: !llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>>) - diff --git a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp index d3ec890bf48590..5b8e236b4618f5 100644 --- a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp +++ b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp @@ -77,8 +77,7 @@ static LogicalResult runMLIRPasses(Operation *op, ConvertFuncToLLVMPassOptions funcToLLVMOptions{}; funcToLLVMOptions.indexBitwidth = DataLayout(module).getTypeSizeInBits(IndexType::get(module.getContext())); - passManager.addPass( - createConvertFuncToLLVMPass(funcToLLVMOptions)); + passManager.addPass(createConvertFuncToLLVMPass(funcToLLVMOptions)); passManager.addPass(createReconcileUnrealizedCastsPass()); passManager.addPass(createConvertVulkanLaunchFuncToVulkanCallsPass()); From 749f37083a2779628b49bdcad5e0109cac331803 Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Tue, 31 Oct 2023 07:36:32 +0100 Subject: [PATCH 118/144] [MLIR][AsyncToLLVM] Remove typed pointer support (#70731) This commit removes the support for lowering Async to LLVM dialect with typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect. Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502 --- mlir/include/mlir/Conversion/Passes.td | 5 - .../mlir/Dialect/LLVMIR/FunctionCallUtils.h | 5 +- .../Conversion/AsyncToLLVM/AsyncToLLVM.cpp | 243 ++++++------------ .../AsyncToLLVM/convert-coro-to-llvm.mlir | 2 +- .../AsyncToLLVM/convert-runtime-to-llvm.mlir | 2 +- .../AsyncToLLVM/convert-to-llvm.mlir | 2 +- .../AsyncToLLVM/typed-pointers.mlir | 138 ---------- 7 files changed, 80 insertions(+), 317 deletions(-) delete mode 100644 mlir/test/Conversion/AsyncToLLVM/typed-pointers.mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index a2307bc243f615..5423be0e91d0ac 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -191,11 +191,6 @@ def ConvertAsyncToLLVMPass : Pass<"convert-async-to-llvm", "ModuleOp"> { "LLVM::LLVMDialect", "func::FuncDialect", ]; - let options = [ - Option<"useOpaquePointers", "use-opaque-pointers", "bool", - /*default=*/"true", "Generate LLVM IR using opaque pointers " - "instead of typed pointers">, - ]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h index 9e69717f471bce..05320c0c718690 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h +++ b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h @@ -52,8 +52,9 @@ LLVM::LLVMFuncOp lookupOrCreatePrintNewlineFn(ModuleOp moduleOp); LLVM::LLVMFuncOp lookupOrCreateMallocFn(ModuleOp moduleOp, Type indexType, bool opaquePointers); LLVM::LLVMFuncOp lookupOrCreateAlignedAllocFn(ModuleOp moduleOp, Type indexType, - bool opaquePointers); -LLVM::LLVMFuncOp lookupOrCreateFreeFn(ModuleOp moduleOp, bool opaquePointers); + bool opaquePointers = true); +LLVM::LLVMFuncOp lookupOrCreateFreeFn(ModuleOp moduleOp, + bool opaquePointers = true); LLVM::LLVMFuncOp lookupOrCreateGenericAllocFn(ModuleOp moduleOp, Type indexType, bool opaquePointers); LLVM::LLVMFuncOp lookupOrCreateGenericAlignedAllocFn(ModuleOp moduleOp, diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp index d9ea60a6749d92..0ab53ce7e3327e 100644 --- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp +++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp @@ -76,20 +76,16 @@ namespace { /// lowering all async data types become opaque pointers at runtime. struct AsyncAPI { // All async types are lowered to opaque LLVM pointers at runtime. - static LLVM::LLVMPointerType opaquePointerType(MLIRContext *ctx, - bool useLLVMOpaquePointers) { - if (useLLVMOpaquePointers) - return LLVM::LLVMPointerType::get(ctx); - return LLVM::LLVMPointerType::get(IntegerType::get(ctx, 8)); + static LLVM::LLVMPointerType opaquePointerType(MLIRContext *ctx) { + return LLVM::LLVMPointerType::get(ctx); } static LLVM::LLVMTokenType tokenType(MLIRContext *ctx) { return LLVM::LLVMTokenType::get(ctx); } - static FunctionType addOrDropRefFunctionType(MLIRContext *ctx, - bool useLLVMOpaquePointers) { - auto ref = opaquePointerType(ctx, useLLVMOpaquePointers); + static FunctionType addOrDropRefFunctionType(MLIRContext *ctx) { + auto ref = opaquePointerType(ctx); auto count = IntegerType::get(ctx, 64); return FunctionType::get(ctx, {ref, count}, {}); } @@ -98,10 +94,9 @@ struct AsyncAPI { return FunctionType::get(ctx, {}, {TokenType::get(ctx)}); } - static FunctionType createValueFunctionType(MLIRContext *ctx, - bool useLLVMOpaquePointers) { + static FunctionType createValueFunctionType(MLIRContext *ctx) { auto i64 = IntegerType::get(ctx, 64); - auto value = opaquePointerType(ctx, useLLVMOpaquePointers); + auto value = opaquePointerType(ctx); return FunctionType::get(ctx, {i64}, {value}); } @@ -110,20 +105,17 @@ struct AsyncAPI { return FunctionType::get(ctx, {i64}, {GroupType::get(ctx)}); } - static FunctionType getValueStorageFunctionType(MLIRContext *ctx, - bool useLLVMOpaquePointers) { - auto value = opaquePointerType(ctx, useLLVMOpaquePointers); - auto storage = opaquePointerType(ctx, useLLVMOpaquePointers); - return FunctionType::get(ctx, {value}, {storage}); + static FunctionType getValueStorageFunctionType(MLIRContext *ctx) { + auto ptrType = opaquePointerType(ctx); + return FunctionType::get(ctx, {ptrType}, {ptrType}); } static FunctionType emplaceTokenFunctionType(MLIRContext *ctx) { return FunctionType::get(ctx, {TokenType::get(ctx)}, {}); } - static FunctionType emplaceValueFunctionType(MLIRContext *ctx, - bool useLLVMOpaquePointers) { - auto value = opaquePointerType(ctx, useLLVMOpaquePointers); + static FunctionType emplaceValueFunctionType(MLIRContext *ctx) { + auto value = opaquePointerType(ctx); return FunctionType::get(ctx, {value}, {}); } @@ -131,9 +123,8 @@ struct AsyncAPI { return FunctionType::get(ctx, {TokenType::get(ctx)}, {}); } - static FunctionType setValueErrorFunctionType(MLIRContext *ctx, - bool useLLVMOpaquePointers) { - auto value = opaquePointerType(ctx, useLLVMOpaquePointers); + static FunctionType setValueErrorFunctionType(MLIRContext *ctx) { + auto value = opaquePointerType(ctx); return FunctionType::get(ctx, {value}, {}); } @@ -142,9 +133,8 @@ struct AsyncAPI { return FunctionType::get(ctx, {TokenType::get(ctx)}, {i1}); } - static FunctionType isValueErrorFunctionType(MLIRContext *ctx, - bool useLLVMOpaquePointers) { - auto value = opaquePointerType(ctx, useLLVMOpaquePointers); + static FunctionType isValueErrorFunctionType(MLIRContext *ctx) { + auto value = opaquePointerType(ctx); auto i1 = IntegerType::get(ctx, 1); return FunctionType::get(ctx, {value}, {i1}); } @@ -158,9 +148,8 @@ struct AsyncAPI { return FunctionType::get(ctx, {TokenType::get(ctx)}, {}); } - static FunctionType awaitValueFunctionType(MLIRContext *ctx, - bool useLLVMOpaquePointers) { - auto value = opaquePointerType(ctx, useLLVMOpaquePointers); + static FunctionType awaitValueFunctionType(MLIRContext *ctx) { + auto value = opaquePointerType(ctx); return FunctionType::get(ctx, {value}, {}); } @@ -168,16 +157,9 @@ struct AsyncAPI { return FunctionType::get(ctx, {GroupType::get(ctx)}, {}); } - static FunctionType executeFunctionType(MLIRContext *ctx, - bool useLLVMOpaquePointers) { - auto hdl = opaquePointerType(ctx, useLLVMOpaquePointers); - Type resume; - if (useLLVMOpaquePointers) - resume = LLVM::LLVMPointerType::get(ctx); - else - resume = LLVM::LLVMPointerType::get( - resumeFunctionType(ctx, useLLVMOpaquePointers)); - return FunctionType::get(ctx, {hdl, resume}, {}); + static FunctionType executeFunctionType(MLIRContext *ctx) { + auto ptrType = opaquePointerType(ctx); + return FunctionType::get(ctx, {ptrType, ptrType}, {}); } static FunctionType addTokenToGroupFunctionType(MLIRContext *ctx) { @@ -186,43 +168,19 @@ struct AsyncAPI { {i64}); } - static FunctionType - awaitTokenAndExecuteFunctionType(MLIRContext *ctx, - bool useLLVMOpaquePointers) { - auto hdl = opaquePointerType(ctx, useLLVMOpaquePointers); - Type resume; - if (useLLVMOpaquePointers) - resume = LLVM::LLVMPointerType::get(ctx); - else - resume = LLVM::LLVMPointerType::get( - resumeFunctionType(ctx, useLLVMOpaquePointers)); - return FunctionType::get(ctx, {TokenType::get(ctx), hdl, resume}, {}); + static FunctionType awaitTokenAndExecuteFunctionType(MLIRContext *ctx) { + auto ptrType = opaquePointerType(ctx); + return FunctionType::get(ctx, {TokenType::get(ctx), ptrType, ptrType}, {}); } - static FunctionType - awaitValueAndExecuteFunctionType(MLIRContext *ctx, - bool useLLVMOpaquePointers) { - auto value = opaquePointerType(ctx, useLLVMOpaquePointers); - auto hdl = opaquePointerType(ctx, useLLVMOpaquePointers); - Type resume; - if (useLLVMOpaquePointers) - resume = LLVM::LLVMPointerType::get(ctx); - else - resume = LLVM::LLVMPointerType::get( - resumeFunctionType(ctx, useLLVMOpaquePointers)); - return FunctionType::get(ctx, {value, hdl, resume}, {}); + static FunctionType awaitValueAndExecuteFunctionType(MLIRContext *ctx) { + auto ptrType = opaquePointerType(ctx); + return FunctionType::get(ctx, {ptrType, ptrType, ptrType}, {}); } - static FunctionType - awaitAllAndExecuteFunctionType(MLIRContext *ctx, bool useLLVMOpaquePointers) { - auto hdl = opaquePointerType(ctx, useLLVMOpaquePointers); - Type resume; - if (useLLVMOpaquePointers) - resume = LLVM::LLVMPointerType::get(ctx); - else - resume = LLVM::LLVMPointerType::get( - resumeFunctionType(ctx, useLLVMOpaquePointers)); - return FunctionType::get(ctx, {GroupType::get(ctx), hdl, resume}, {}); + static FunctionType awaitAllAndExecuteFunctionType(MLIRContext *ctx) { + auto ptrType = opaquePointerType(ctx); + return FunctionType::get(ctx, {GroupType::get(ctx), ptrType, ptrType}, {}); } static FunctionType getNumWorkerThreads(MLIRContext *ctx) { @@ -230,17 +188,16 @@ struct AsyncAPI { } // Auxiliary coroutine resume intrinsic wrapper. - static Type resumeFunctionType(MLIRContext *ctx, bool useLLVMOpaquePointers) { + static Type resumeFunctionType(MLIRContext *ctx) { auto voidTy = LLVM::LLVMVoidType::get(ctx); - auto ptrType = opaquePointerType(ctx, useLLVMOpaquePointers); + auto ptrType = opaquePointerType(ctx); return LLVM::LLVMFunctionType::get(voidTy, {ptrType}, false); } }; } // namespace /// Adds Async Runtime C API declarations to the module. -static void addAsyncRuntimeApiDeclarations(ModuleOp module, - bool useLLVMOpaquePointers) { +static void addAsyncRuntimeApiDeclarations(ModuleOp module) { auto builder = ImplicitLocOpBuilder::atBlockEnd(module.getLoc(), module.getBody()); @@ -251,39 +208,30 @@ static void addAsyncRuntimeApiDeclarations(ModuleOp module, }; MLIRContext *ctx = module.getContext(); - addFuncDecl(kAddRef, - AsyncAPI::addOrDropRefFunctionType(ctx, useLLVMOpaquePointers)); - addFuncDecl(kDropRef, - AsyncAPI::addOrDropRefFunctionType(ctx, useLLVMOpaquePointers)); + addFuncDecl(kAddRef, AsyncAPI::addOrDropRefFunctionType(ctx)); + addFuncDecl(kDropRef, AsyncAPI::addOrDropRefFunctionType(ctx)); addFuncDecl(kCreateToken, AsyncAPI::createTokenFunctionType(ctx)); - addFuncDecl(kCreateValue, - AsyncAPI::createValueFunctionType(ctx, useLLVMOpaquePointers)); + addFuncDecl(kCreateValue, AsyncAPI::createValueFunctionType(ctx)); addFuncDecl(kCreateGroup, AsyncAPI::createGroupFunctionType(ctx)); addFuncDecl(kEmplaceToken, AsyncAPI::emplaceTokenFunctionType(ctx)); - addFuncDecl(kEmplaceValue, - AsyncAPI::emplaceValueFunctionType(ctx, useLLVMOpaquePointers)); + addFuncDecl(kEmplaceValue, AsyncAPI::emplaceValueFunctionType(ctx)); addFuncDecl(kSetTokenError, AsyncAPI::setTokenErrorFunctionType(ctx)); - addFuncDecl(kSetValueError, - AsyncAPI::setValueErrorFunctionType(ctx, useLLVMOpaquePointers)); + addFuncDecl(kSetValueError, AsyncAPI::setValueErrorFunctionType(ctx)); addFuncDecl(kIsTokenError, AsyncAPI::isTokenErrorFunctionType(ctx)); - addFuncDecl(kIsValueError, - AsyncAPI::isValueErrorFunctionType(ctx, useLLVMOpaquePointers)); + addFuncDecl(kIsValueError, AsyncAPI::isValueErrorFunctionType(ctx)); addFuncDecl(kIsGroupError, AsyncAPI::isGroupErrorFunctionType(ctx)); addFuncDecl(kAwaitToken, AsyncAPI::awaitTokenFunctionType(ctx)); - addFuncDecl(kAwaitValue, - AsyncAPI::awaitValueFunctionType(ctx, useLLVMOpaquePointers)); + addFuncDecl(kAwaitValue, AsyncAPI::awaitValueFunctionType(ctx)); addFuncDecl(kAwaitGroup, AsyncAPI::awaitGroupFunctionType(ctx)); - addFuncDecl(kExecute, - AsyncAPI::executeFunctionType(ctx, useLLVMOpaquePointers)); - addFuncDecl(kGetValueStorage, AsyncAPI::getValueStorageFunctionType( - ctx, useLLVMOpaquePointers)); + addFuncDecl(kExecute, AsyncAPI::executeFunctionType(ctx)); + addFuncDecl(kGetValueStorage, AsyncAPI::getValueStorageFunctionType(ctx)); addFuncDecl(kAddTokenToGroup, AsyncAPI::addTokenToGroupFunctionType(ctx)); - addFuncDecl(kAwaitTokenAndExecute, AsyncAPI::awaitTokenAndExecuteFunctionType( - ctx, useLLVMOpaquePointers)); - addFuncDecl(kAwaitValueAndExecute, AsyncAPI::awaitValueAndExecuteFunctionType( - ctx, useLLVMOpaquePointers)); - addFuncDecl(kAwaitAllAndExecute, AsyncAPI::awaitAllAndExecuteFunctionType( - ctx, useLLVMOpaquePointers)); + addFuncDecl(kAwaitTokenAndExecute, + AsyncAPI::awaitTokenAndExecuteFunctionType(ctx)); + addFuncDecl(kAwaitValueAndExecute, + AsyncAPI::awaitValueAndExecuteFunctionType(ctx)); + addFuncDecl(kAwaitAllAndExecute, + AsyncAPI::awaitAllAndExecuteFunctionType(ctx)); addFuncDecl(kGetNumWorkerThreads, AsyncAPI::getNumWorkerThreads(ctx)); } @@ -296,7 +244,7 @@ static constexpr const char *kResume = "__resume"; /// A function that takes a coroutine handle and calls a `llvm.coro.resume` /// intrinsics. We need this function to be able to pass it to the async /// runtime execute API. -static void addResumeFunction(ModuleOp module, bool useOpaquePointers) { +static void addResumeFunction(ModuleOp module) { if (module.lookupSymbol(kResume)) return; @@ -305,11 +253,7 @@ static void addResumeFunction(ModuleOp module, bool useOpaquePointers) { auto moduleBuilder = ImplicitLocOpBuilder::atBlockEnd(loc, module.getBody()); auto voidTy = LLVM::LLVMVoidType::get(ctx); - Type ptrType; - if (useOpaquePointers) - ptrType = LLVM::LLVMPointerType::get(ctx); - else - ptrType = LLVM::LLVMPointerType::get(IntegerType::get(ctx, 8)); + Type ptrType = AsyncAPI::opaquePointerType(ctx); auto resumeOp = moduleBuilder.create( kResume, LLVM::LLVMFunctionType::get(voidTy, {ptrType})); @@ -330,15 +274,10 @@ namespace { /// AsyncRuntimeTypeConverter only converts types from the Async dialect to /// their runtime type (opaque pointers) and does not convert any other types. class AsyncRuntimeTypeConverter : public TypeConverter { - bool llvmOpaquePointers = false; - public: - AsyncRuntimeTypeConverter(const LowerToLLVMOptions &options) - : llvmOpaquePointers(options.useOpaquePointers) { + AsyncRuntimeTypeConverter(const LowerToLLVMOptions &options) { addConversion([](Type type) { return type; }); - addConversion([this](Type type) { - return convertAsyncTypes(type, llvmOpaquePointers); - }); + addConversion([](Type type) { return convertAsyncTypes(type); }); // Use UnrealizedConversionCast as the bridge so that we don't need to pull // in patterns for other dialects. @@ -352,28 +291,14 @@ class AsyncRuntimeTypeConverter : public TypeConverter { addTargetMaterialization(addUnrealizedCast); } - /// Returns whether LLVM opaque pointers should be used instead of typed - /// pointers. - bool useOpaquePointers() const { return llvmOpaquePointers; } - - /// Creates an LLVM pointer type which may either be a typed pointer or an - /// opaque pointer, depending on what options the converter was constructed - /// with. - LLVM::LLVMPointerType getPointerType(Type elementType) const { - if (llvmOpaquePointers) - return LLVM::LLVMPointerType::get(elementType.getContext()); - return LLVM::LLVMPointerType::get(elementType); - } - - static std::optional convertAsyncTypes(Type type, - bool useOpaquePointers) { + static std::optional convertAsyncTypes(Type type) { if (isa(type)) - return AsyncAPI::opaquePointerType(type.getContext(), useOpaquePointers); + return AsyncAPI::opaquePointerType(type.getContext()); if (isa(type)) return AsyncAPI::tokenType(type.getContext()); if (isa(type)) - return AsyncAPI::opaquePointerType(type.getContext(), useOpaquePointers); + return AsyncAPI::opaquePointerType(type.getContext()); return std::nullopt; } @@ -414,8 +339,7 @@ class CoroIdOpConversion : public AsyncOpConversionPattern { matchAndRewrite(CoroIdOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { auto token = AsyncAPI::tokenType(op->getContext()); - auto ptrType = AsyncAPI::opaquePointerType( - op->getContext(), getTypeConverter()->useOpaquePointers()); + auto ptrType = AsyncAPI::opaquePointerType(op->getContext()); auto loc = op->getLoc(); // Constants for initializing coroutine frame. @@ -444,8 +368,7 @@ class CoroBeginOpConversion : public AsyncOpConversionPattern { LogicalResult matchAndRewrite(CoroBeginOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto ptrType = AsyncAPI::opaquePointerType( - op->getContext(), getTypeConverter()->useOpaquePointers()); + auto ptrType = AsyncAPI::opaquePointerType(op->getContext()); auto loc = op->getLoc(); // Get coroutine frame size: @llvm.coro.size.i64. @@ -472,8 +395,7 @@ class CoroBeginOpConversion : public AsyncOpConversionPattern { // Allocate memory for the coroutine frame. auto allocFuncOp = LLVM::lookupOrCreateAlignedAllocFn( - op->getParentOfType(), rewriter.getI64Type(), - getTypeConverter()->useOpaquePointers()); + op->getParentOfType(), rewriter.getI64Type()); auto coroAlloc = rewriter.create( loc, allocFuncOp, ValueRange{coroAlign, coroSize}); @@ -499,8 +421,7 @@ class CoroFreeOpConversion : public AsyncOpConversionPattern { LogicalResult matchAndRewrite(CoroFreeOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto ptrType = AsyncAPI::opaquePointerType( - op->getContext(), getTypeConverter()->useOpaquePointers()); + auto ptrType = AsyncAPI::opaquePointerType(op->getContext()); auto loc = op->getLoc(); // Get a pointer to the coroutine frame memory: @llvm.coro.free. @@ -509,8 +430,7 @@ class CoroFreeOpConversion : public AsyncOpConversionPattern { // Free the memory. auto freeFuncOp = - LLVM::lookupOrCreateFreeFn(op->getParentOfType(), - getTypeConverter()->useOpaquePointers()); + LLVM::lookupOrCreateFreeFn(op->getParentOfType()); rewriter.replaceOpWithNewOp(op, freeFuncOp, ValueRange(coroMem.getResult())); @@ -538,8 +458,9 @@ class CoroEndOpConversion : public OpConversionPattern { // Mark the end of a coroutine: @llvm.coro.end. auto coroHdl = adaptor.getHandle(); - rewriter.create(op->getLoc(), rewriter.getI1Type(), - ValueRange({coroHdl, constFalse, noneToken})); + rewriter.create( + op->getLoc(), rewriter.getI1Type(), + ValueRange({coroHdl, constFalse, noneToken})); rewriter.eraseOp(op); return success(); @@ -673,7 +594,8 @@ class RuntimeCreateOpLowering : public ConvertOpToLLVMPattern { auto i64 = rewriter.getI64Type(); auto storedType = converter->convertType(valueType.getValueType()); - auto storagePtrType = getTypeConverter()->getPointerType(storedType); + auto storagePtrType = + AsyncAPI::opaquePointerType(rewriter.getContext()); // %Size = getelementptr %T* null, int 1 // %SizeI = ptrtoint %T* %Size to i64 @@ -846,12 +768,10 @@ class RuntimeAwaitAndResumeOpLowering Value handle = adaptor.getHandle(); // A pointer to coroutine resume intrinsic wrapper. - addResumeFunction(op->getParentOfType(), - getTypeConverter()->useOpaquePointers()); - auto resumeFnTy = AsyncAPI::resumeFunctionType( - op->getContext(), getTypeConverter()->useOpaquePointers()); + addResumeFunction(op->getParentOfType()); auto resumePtr = rewriter.create( - op->getLoc(), getTypeConverter()->getPointerType(resumeFnTy), kResume); + op->getLoc(), AsyncAPI::opaquePointerType(rewriter.getContext()), + kResume); rewriter.create( op->getLoc(), apiFuncName, TypeRange(), @@ -877,12 +797,10 @@ class RuntimeResumeOpLowering matchAndRewrite(RuntimeResumeOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // A pointer to coroutine resume intrinsic wrapper. - addResumeFunction(op->getParentOfType(), - getTypeConverter()->useOpaquePointers()); - auto resumeFnTy = AsyncAPI::resumeFunctionType( - op->getContext(), getTypeConverter()->useOpaquePointers()); + addResumeFunction(op->getParentOfType()); auto resumePtr = rewriter.create( - op->getLoc(), getTypeConverter()->getPointerType(resumeFnTy), kResume); + op->getLoc(), AsyncAPI::opaquePointerType(rewriter.getContext()), + kResume); // Call async runtime API to execute a coroutine in the managed thread. auto coroHdl = adaptor.getHandle(); @@ -909,8 +827,7 @@ class RuntimeStoreOpLowering : public ConvertOpToLLVMPattern { Location loc = op->getLoc(); // Get a pointer to the async value storage from the runtime. - auto ptrType = AsyncAPI::opaquePointerType( - rewriter.getContext(), getTypeConverter()->useOpaquePointers()); + auto ptrType = AsyncAPI::opaquePointerType(rewriter.getContext()); auto storage = adaptor.getStorage(); auto storagePtr = rewriter.create( loc, kGetValueStorage, TypeRange(ptrType), storage); @@ -923,11 +840,6 @@ class RuntimeStoreOpLowering : public ConvertOpToLLVMPattern { op, "failed to convert stored value type to LLVM type"); Value castedStoragePtr = storagePtr.getResult(0); - if (!getTypeConverter()->useOpaquePointers()) - castedStoragePtr = rewriter.create( - loc, getTypeConverter()->getPointerType(llvmValueType), - castedStoragePtr); - // Store the yielded value into the async value storage. auto value = adaptor.getValue(); rewriter.create(loc, value, castedStoragePtr); @@ -955,8 +867,7 @@ class RuntimeLoadOpLowering : public ConvertOpToLLVMPattern { Location loc = op->getLoc(); // Get a pointer to the async value storage from the runtime. - auto ptrType = AsyncAPI::opaquePointerType( - rewriter.getContext(), getTypeConverter()->useOpaquePointers()); + auto ptrType = AsyncAPI::opaquePointerType(rewriter.getContext()); auto storage = adaptor.getStorage(); auto storagePtr = rewriter.create( loc, kGetValueStorage, TypeRange(ptrType), storage); @@ -969,10 +880,6 @@ class RuntimeLoadOpLowering : public ConvertOpToLLVMPattern { op, "failed to convert loaded value type to LLVM type"); Value castedStoragePtr = storagePtr.getResult(0); - if (!getTypeConverter()->useOpaquePointers()) - castedStoragePtr = rewriter.create( - loc, getTypeConverter()->getPointerType(llvmValueType), - castedStoragePtr); // Load from the casted pointer. rewriter.replaceOpWithNewOp(op, llvmValueType, @@ -1115,12 +1022,11 @@ void ConvertAsyncToLLVMPass::runOnOperation() { MLIRContext *ctx = module->getContext(); LowerToLLVMOptions options(ctx); - options.useOpaquePointers = useOpaquePointers; // Add declarations for most functions required by the coroutines lowering. // We delay adding the resume function until it's needed because it currently // fails to compile unless '-O0' is specified. - addAsyncRuntimeApiDeclarations(module, useOpaquePointers); + addAsyncRuntimeApiDeclarations(module); // Lower async.runtime and async.coro operations to Async Runtime API and // LLVM coroutine intrinsics. @@ -1133,8 +1039,7 @@ void ConvertAsyncToLLVMPass::runOnOperation() { // operations. LLVMTypeConverter llvmConverter(ctx, options); llvmConverter.addConversion([&](Type type) { - return AsyncRuntimeTypeConverter::convertAsyncTypes( - type, llvmConverter.useOpaquePointers()); + return AsyncRuntimeTypeConverter::convertAsyncTypes(type); }); // Convert async types in function signatures and function calls. diff --git a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir index 8a611cf96f5b5f..a398bc5710a865 100644 --- a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir +++ b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-async-to-llvm='use-opaque-pointers=1' | FileCheck %s +// RUN: mlir-opt %s -convert-async-to-llvm | FileCheck %s // CHECK-LABEL: @coro_id func.func @coro_id() { diff --git a/mlir/test/Conversion/AsyncToLLVM/convert-runtime-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-runtime-to-llvm.mlir index 3672be91bbc07a..4077edc7420dca 100644 --- a/mlir/test/Conversion/AsyncToLLVM/convert-runtime-to-llvm.mlir +++ b/mlir/test/Conversion/AsyncToLLVM/convert-runtime-to-llvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-async-to-llvm='use-opaque-pointers=1' | FileCheck %s --dump-input=always +// RUN: mlir-opt %s -convert-async-to-llvm | FileCheck %s --dump-input=always // CHECK-LABEL: @create_token func.func @create_token() { diff --git a/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir index fd419dc95e7a1a..dd54bdb7987244 100644 --- a/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir +++ b/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -async-to-async-runtime -convert-async-to-llvm='use-opaque-pointers=1' | FileCheck %s +// RUN: mlir-opt %s -split-input-file -async-to-async-runtime -convert-async-to-llvm | FileCheck %s // CHECK-LABEL: reference_counting func.func @reference_counting(%arg0: !async.token) { diff --git a/mlir/test/Conversion/AsyncToLLVM/typed-pointers.mlir b/mlir/test/Conversion/AsyncToLLVM/typed-pointers.mlir deleted file mode 100644 index 07cd2add3b1512..00000000000000 --- a/mlir/test/Conversion/AsyncToLLVM/typed-pointers.mlir +++ /dev/null @@ -1,138 +0,0 @@ -// RUN: mlir-opt %s -split-input-file -async-to-async-runtime -convert-async-to-llvm='use-opaque-pointers=0' | FileCheck %s - - - -// CHECK-LABEL: @store -func.func @store() { - // CHECK: %[[CST:.*]] = arith.constant 1.0 - %0 = arith.constant 1.0 : f32 - // CHECK: %[[VALUE:.*]] = call @mlirAsyncRuntimeCreateValue - %1 = async.runtime.create : !async.value - // CHECK: %[[P0:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[VALUE]]) - // CHECK: %[[P1:.*]] = llvm.bitcast %[[P0]] : !llvm.ptr to !llvm.ptr - // CHECK: llvm.store %[[CST]], %[[P1]] - async.runtime.store %0, %1 : !async.value - return -} - -// CHECK-LABEL: @load -func.func @load() -> f32 { - // CHECK: %[[VALUE:.*]] = call @mlirAsyncRuntimeCreateValue - %0 = async.runtime.create : !async.value - // CHECK: %[[P0:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[VALUE]]) - // CHECK: %[[P1:.*]] = llvm.bitcast %[[P0]] : !llvm.ptr to !llvm.ptr - // CHECK: %[[VALUE:.*]] = llvm.load %[[P1]] - %1 = async.runtime.load %0 : !async.value - // CHECK: return %[[VALUE]] : f32 - return %1 : f32 -} - -// ----- - -// CHECK-LABEL: execute_no_async_args -func.func @execute_no_async_args(%arg0: f32, %arg1: memref<1xf32>) { - // CHECK: %[[TOKEN:.*]] = call @async_execute_fn(%arg0, %arg1) - %token = async.execute { - %c0 = arith.constant 0 : index - memref.store %arg0, %arg1[%c0] : memref<1xf32> - async.yield - } - // CHECK: call @mlirAsyncRuntimeAwaitToken(%[[TOKEN]]) - // CHECK: %[[IS_ERROR:.*]] = call @mlirAsyncRuntimeIsTokenError(%[[TOKEN]]) - // CHECK: %[[TRUE:.*]] = arith.constant true - // CHECK: %[[NOT_ERROR:.*]] = arith.xori %[[IS_ERROR]], %[[TRUE]] : i1 - // CHECK: cf.assert %[[NOT_ERROR]] - // CHECK-NEXT: return - async.await %token : !async.token - return -} - -// Function outlined from the async.execute operation. -// CHECK-LABEL: func private @async_execute_fn(%arg0: f32, %arg1: memref<1xf32>) -// CHECK-SAME: -> !llvm.ptr - -// Create token for return op, and mark a function as a coroutine. -// CHECK: %[[RET:.*]] = call @mlirAsyncRuntimeCreateToken() -// CHECK: %[[HDL:.*]] = llvm.intr.coro.begin - -// Pass a suspended coroutine to the async runtime. -// CHECK: %[[STATE:.*]] = llvm.intr.coro.save -// CHECK: %[[RESUME:.*]] = llvm.mlir.addressof @__resume -// CHECK: call @mlirAsyncRuntimeExecute(%[[HDL]], %[[RESUME]]) -// CHECK: %[[SUSPENDED:.*]] = llvm.intr.coro.suspend %[[STATE]] - -// Decide the next block based on the code returned from suspend. -// CHECK: %[[SEXT:.*]] = llvm.sext %[[SUSPENDED]] : i8 to i32 -// CHECK: llvm.switch %[[SEXT]] : i32, ^[[SUSPEND:[b0-9]+]] -// CHECK-NEXT: 0: ^[[RESUME:[b0-9]+]] -// CHECK-NEXT: 1: ^[[CLEANUP:[b0-9]+]] - -// Resume coroutine after suspension. -// CHECK: ^[[RESUME]]: -// CHECK: memref.store %arg0, %arg1[%c0] : memref<1xf32> -// CHECK: call @mlirAsyncRuntimeEmplaceToken(%[[RET]]) - -// Delete coroutine. -// CHECK: ^[[CLEANUP]]: -// CHECK: %[[MEM:.*]] = llvm.intr.coro.free -// CHECK: llvm.call @free(%[[MEM]]) - -// Suspend coroutine, and also a return statement for ramp function. -// CHECK: ^[[SUSPEND]]: -// CHECK: llvm.intr.coro.end -// CHECK: return %[[RET]] - -// ----- - -// CHECK-LABEL: execute_and_return_f32 -func.func @execute_and_return_f32() -> f32 { - // CHECK: %[[RET:.*]]:2 = call @async_execute_fn - %token, %result = async.execute -> !async.value { - %c0 = arith.constant 123.0 : f32 - async.yield %c0 : f32 - } - - // CHECK: %[[STORAGE:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[RET]]#1) - // CHECK: %[[ST_F32:.*]] = llvm.bitcast %[[STORAGE]] - // CHECK: %[[LOADED:.*]] = llvm.load %[[ST_F32]] : !llvm.ptr - %0 = async.await %result : !async.value - - return %0 : f32 -} - -// Function outlined from the async.execute operation. -// CHECK-LABEL: func private @async_execute_fn() -// CHECK: %[[TOKEN:.*]] = call @mlirAsyncRuntimeCreateToken() -// CHECK: %[[VALUE:.*]] = call @mlirAsyncRuntimeCreateValue -// CHECK: %[[HDL:.*]] = llvm.intr.coro.begin - -// Suspend coroutine in the beginning. -// CHECK: call @mlirAsyncRuntimeExecute(%[[HDL]], -// CHECK: llvm.intr.coro.suspend - -// Emplace result value. -// CHECK: %[[CST:.*]] = arith.constant 1.230000e+02 : f32 -// CHECK: %[[STORAGE:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[VALUE]]) -// CHECK: %[[ST_F32:.*]] = llvm.bitcast %[[STORAGE]] -// CHECK: llvm.store %[[CST]], %[[ST_F32]] : !llvm.ptr -// CHECK: call @mlirAsyncRuntimeEmplaceValue(%[[VALUE]]) - -// Emplace result token. -// CHECK: call @mlirAsyncRuntimeEmplaceToken(%[[TOKEN]]) - -// ----- - -// CHECK-LABEL: @await_and_resume_group -func.func @await_and_resume_group() { - %c = arith.constant 1 : index - %0 = async.coro.id - // CHECK: %[[HDL:.*]] = llvm.intr.coro.begin - %1 = async.coro.begin %0 - // CHECK: %[[TOKEN:.*]] = call @mlirAsyncRuntimeCreateGroup - %2 = async.runtime.create_group %c : !async.group - // CHECK: %[[RESUME:.*]] = llvm.mlir.addressof @__resume - // CHECK: call @mlirAsyncRuntimeAwaitAllInGroupAndExecute - // CHECK-SAME: (%[[TOKEN]], %[[HDL]], %[[RESUME]]) - async.runtime.await_and_resume %2, %1 : !async.group - return -} From 4fed3d374dfca82d0cb32bb444985ece04438376 Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Tue, 31 Oct 2023 07:36:46 +0100 Subject: [PATCH 119/144] [MLIR][ControlFlowToLLVM] Remove typed pointer support (#70733) This commit removes the support for lowering ControlFlow to LLVM dialect with typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect. Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502 --- mlir/include/mlir/Conversion/Passes.td | 5 +---- mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp | 1 - mlir/test/Conversion/ControlFlowToLLVM/assert.mlir | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 5423be0e91d0ac..ba7dc642af2a07 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -293,10 +293,7 @@ def ConvertControlFlowToLLVMPass : Pass<"convert-cf-to-llvm", "ModuleOp"> { let options = [ Option<"indexBitwidth", "index-bitwidth", "unsigned", /*default=kDeriveIndexBitwidthFromDataLayout*/"0", - "Bitwidth of the index type, 0 to use size of machine word">, - Option<"useOpaquePointers", "use-opaque-pointers", "bool", - /*default=*/"true", "Generate LLVM IR using opaque pointers " - "instead of typed pointers">, + "Bitwidth of the index type, 0 to use size of machine word"> ]; } diff --git a/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp b/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp index 433d8a01a1ac8a..b8e5aec25286d2 100644 --- a/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp +++ b/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp @@ -236,7 +236,6 @@ struct ConvertControlFlowToLLVM LowerToLLVMOptions options(&getContext()); if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout) options.overrideIndexBitwidth(indexBitwidth); - options.useOpaquePointers = useOpaquePointers; LLVMTypeConverter converter(&getContext(), options); mlir::cf::populateControlFlowToLLVMConversionPatterns(converter, patterns); diff --git a/mlir/test/Conversion/ControlFlowToLLVM/assert.mlir b/mlir/test/Conversion/ControlFlowToLLVM/assert.mlir index dc5ba0680acb2e..3ec8f1fa1e5678 100644 --- a/mlir/test/Conversion/ControlFlowToLLVM/assert.mlir +++ b/mlir/test/Conversion/ControlFlowToLLVM/assert.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-cf-to-llvm='use-opaque-pointers=1' | FileCheck %s +// RUN: mlir-opt %s -convert-cf-to-llvm | FileCheck %s // Same below, but using the `ConvertToLLVMPatternInterface` entry point // and the generic `convert-to-llvm` pass. From 8d4e35600f3ba90997a59fdb9baeb196e723eec9 Mon Sep 17 00:00:00 2001 From: licongtian Date: Wed, 20 Sep 2023 11:21:56 +0800 Subject: [PATCH 120/144] [Clang][LoongArch] Support compiler options -mlsx/-mlasx for clang This patch adds compiler options -mlsx/-mlasx which enables the instruction sets of LSX and LASX, and sets related predefined macros according to the options. --- .../clang/Basic/DiagnosticDriverKinds.td | 6 +++ clang/include/clang/Driver/Options.td | 10 +++++ clang/lib/Basic/Targets/LoongArch.cpp | 12 +++++- clang/lib/Basic/Targets/LoongArch.h | 4 ++ .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 32 +++++++++++++++ clang/test/Driver/loongarch-mlasx-error.c | 15 +++++++ clang/test/Driver/loongarch-mlasx.c | 37 +++++++++++++++++ clang/test/Driver/loongarch-mlsx-error.c | 12 ++++++ clang/test/Driver/loongarch-mlsx.c | 41 +++++++++++++++++++ clang/test/Preprocessor/init-loongarch.c | 35 ++++++++++++++++ 10 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 clang/test/Driver/loongarch-mlasx-error.c create mode 100644 clang/test/Driver/loongarch-mlasx.c create mode 100644 clang/test/Driver/loongarch-mlsx-error.c create mode 100644 clang/test/Driver/loongarch-mlsx.c diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index c0ccd64a2a7b82..676f1a62b49dd0 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -765,6 +765,12 @@ def warn_drv_loongarch_conflicting_implied_val : Warning< InGroup; def err_drv_loongarch_invalid_mfpu_EQ : Error< "invalid argument '%0' to -mfpu=; must be one of: 64, 32, none, 0 (alias for none)">; +def err_drv_loongarch_wrong_fpu_width_for_lsx : Error< + "wrong fpu width; LSX depends on 64-bit FPU.">; +def err_drv_loongarch_wrong_fpu_width_for_lasx : Error< + "wrong fpu width; LASX depends on 64-bit FPU.">; +def err_drv_loongarch_invalid_simd_option_combination : Error< + "invalid option combination; LASX depends on LSX.">; def err_drv_expand_response_file : Error< "failed to expand response file: %0">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 7f3f5125d42e7a..c8b730e0f7ecd8 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -208,6 +208,8 @@ def m_riscv_Features_Group : OptionGroup<"">, Group, DocName<"RISC-V">; def m_ve_Features_Group : OptionGroup<"">, Group, DocName<"VE">; +def m_loongarch_Features_Group : OptionGroup<"">, + Group, DocName<"LoongArch">; def m_libc_Group : OptionGroup<"">, Group, Flags<[HelpHidden]>; @@ -4886,6 +4888,14 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg="> def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">, Visibility<[ClangOption, CC1Option]>, Group, MarshallingInfoFlag>; +def mlsx : Flag<["-"], "mlsx">, Group, + HelpText<"Enable Loongson SIMD Extension (LSX).">; +def mno_lsx : Flag<["-"], "mno-lsx">, Group, + HelpText<"Disable Loongson SIMD Extension (LSX).">; +def mlasx : Flag<["-"], "mlasx">, Group, + HelpText<"Enable Loongson Advanced SIMD Extension (LASX).">; +def mno_lasx : Flag<["-"], "mno-lasx">, Group, + HelpText<"Disable Loongson Advanced SIMD Extension (LASX).">; def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, Visibility<[ClangOption, CC1Option]>, Group, MarshallingInfoFlag>; diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp index 4448a2ae10a172..88537989a05129 100644 --- a/clang/lib/Basic/Targets/LoongArch.cpp +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -208,6 +208,11 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, TuneCPU = ArchName; Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); + if (HasFeatureLSX) + Builder.defineMacro("__loongarch_sx", Twine(1)); + if (HasFeatureLASX) + Builder.defineMacro("__loongarch_asx", Twine(1)); + StringRef ABI = getABI(); if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") Builder.defineMacro("__loongarch_lp64"); @@ -257,6 +262,8 @@ bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { .Case("loongarch64", Is64Bit) .Case("32bit", !Is64Bit) .Case("64bit", Is64Bit) + .Case("lsx", HasFeatureLSX) + .Case("lasx", HasFeatureLASX) .Default(false); } @@ -274,7 +281,10 @@ bool LoongArchTargetInfo::handleTargetFeatures( if (Feature == "+d") { HasFeatureD = true; } - } + } else if (Feature == "+lsx") + HasFeatureLSX = true; + else if (Feature == "+lasx") + HasFeatureLASX = true; } return true; } diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index ba7fb78ab94cd2..3313102492cb8d 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -27,12 +27,16 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { std::string CPU; bool HasFeatureD; bool HasFeatureF; + bool HasFeatureLSX; + bool HasFeatureLASX; public: LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple) { HasFeatureD = false; HasFeatureF = false; + HasFeatureLSX = false; + HasFeatureLASX = false; LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp index 65925e9ed61010..31153a67ad2840 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -175,6 +175,38 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, A->ignoreTargetSpecific(); if (Arg *A = Args.getLastArgNoClaim(options::OPT_mfpu_EQ)) A->ignoreTargetSpecific(); + + // Select lsx feature determined by -m[no-]lsx. + if (const Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { + // LSX depends on 64-bit FPU. + // -m*-float and -mfpu=none/0/32 conflict with -mlsx. + if (A->getOption().matches(options::OPT_mlsx)) { + if (llvm::find(Features, "-d") != Features.end()) + D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); + else /*-mlsx*/ + Features.push_back("+lsx"); + } else /*-mno-lsx*/ { + Features.push_back("-lsx"); + } + } + + // Select lasx feature determined by -m[no-]lasx. + if (const Arg *A = + Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { + // LASX depends on 64-bit FPU and LSX. + // -mno-lsx conflicts with -mlasx. + if (A->getOption().matches(options::OPT_mlasx)) { + if (llvm::find(Features, "-d") != Features.end()) + D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); + else if (llvm::find(Features, "-lsx") != Features.end()) + D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); + else { /*-mlasx*/ + Features.push_back("+lsx"); + Features.push_back("+lasx"); + } + } else /*-mno-lasx*/ + Features.push_back("-lasx"); + } } std::string loongarch::postProcessTargetCPUString(const std::string &CPU, diff --git a/clang/test/Driver/loongarch-mlasx-error.c b/clang/test/Driver/loongarch-mlasx-error.c new file mode 100644 index 00000000000000..e66f277f7c292f --- /dev/null +++ b/clang/test/Driver/loongarch-mlasx-error.c @@ -0,0 +1,15 @@ +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msingle-float 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msoft-float 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=32 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=0 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=none 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mno-lsx 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU128 %s + +// ERROR_LASX_FPU64: error: wrong fpu width; LASX depends on 64-bit FPU. +// ERROR_LASX_FPU128: error: invalid option combination; LASX depends on LSX. diff --git a/clang/test/Driver/loongarch-mlasx.c b/clang/test/Driver/loongarch-mlasx.c new file mode 100644 index 00000000000000..0b934f125c9e46 --- /dev/null +++ b/clang/test/Driver/loongarch-mlasx.c @@ -0,0 +1,37 @@ +/// Test -m[no-]lasx options. + +// RUN: %clang --target=loongarch64 -mlasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LASX +// RUN: %clang --target=loongarch64 -mno-lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LASX +// RUN: %clang --target=loongarch64 -mlsx -mlasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LASX +// RUN: %clang --target=loongarch64 -mlasx -mlsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LASX + +// RUN: %clang --target=loongarch64 -mlasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LASX +// RUN: %clang --target=loongarch64 -mno-lasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LASX +// RUN: %clang --target=loongarch64 -mlsx -mlasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LASX +// RUN: %clang --target=loongarch64 -mlasx -mlsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LASX + +// CC1-LASX: "-target-feature" "+lsx" "-target-feature" "+lasx" +// CC1-NOLASX: "-target-feature" "-lasx" + +// IR-LASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lasx{{(,.*)?}}" +// IR-NOLASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lasx{{(,.*)?}}" + +int foo(void){ + return 3; +} diff --git a/clang/test/Driver/loongarch-mlsx-error.c b/clang/test/Driver/loongarch-mlsx-error.c new file mode 100644 index 00000000000000..bd6b8e2718bf60 --- /dev/null +++ b/clang/test/Driver/loongarch-mlsx-error.c @@ -0,0 +1,12 @@ +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msingle-float 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msoft-float 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=32 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=0 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=none 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s + +// ERROR_LSX_FPU64: error: wrong fpu width; LSX depends on 64-bit FPU. diff --git a/clang/test/Driver/loongarch-mlsx.c b/clang/test/Driver/loongarch-mlsx.c new file mode 100644 index 00000000000000..7d4307b078e1a0 --- /dev/null +++ b/clang/test/Driver/loongarch-mlsx.c @@ -0,0 +1,41 @@ +/// Test -m[no-]lsx options. + +// RUN: %clang --target=loongarch64 -mlsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LSX +// RUN: %clang --target=loongarch64 -mno-lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLSX +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLSX +// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LSX +// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LSX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LSX +// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLSX + +// RUN: %clang --target=loongarch64 -mlsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LSX +// RUN: %clang --target=loongarch64 -mno-lsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLSX +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLSX +// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LSX +// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LSX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LSX +// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLSX + +// CC1-LSX: "-target-feature" "+lsx" +// CC1-NOLSX: "-target-feature" "-lsx" + +// IR-LSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lsx{{(,.*)?}}" +// IR-NOLSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lsx{{(,.*)?}}" + +int foo(void){ + return 3; +} diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c index 4ef42a921ec033..e235a728302153 100644 --- a/clang/test/Preprocessor/init-loongarch.c +++ b/clang/test/Preprocessor/init-loongarch.c @@ -807,3 +807,38 @@ // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" // ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" + +// RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// MLSX-NOT: #define __loongarch_asx +// MLSX: #define __loongarch_sx 1 + +// RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// RUN: %clang --target=loongarch64 -mlsx -mlasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// MLASX: #define __loongarch_asx 1 +// MLASX: #define __loongarch_sx 1 + +// RUN: %clang --target=loongarch64 -mno-lsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// MNO-LSX-NOT: #define __loongarch_asx +// MNO-LSX-NOT: #define __loongarch_sx From eb49b86f5a9b54b0e3c37024334a3c6f6ca88e14 Mon Sep 17 00:00:00 2001 From: licongtian Date: Wed, 25 Oct 2023 17:35:32 +0800 Subject: [PATCH 121/144] [Clang][LoongArch] Add ABI implementation of passing vectors --- clang/lib/CodeGen/Targets/LoongArch.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp index 7483bf6d6d1e8e..26c68c3583b2a1 100644 --- a/clang/lib/CodeGen/Targets/LoongArch.cpp +++ b/clang/lib/CodeGen/Targets/LoongArch.cpp @@ -321,6 +321,13 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, return ABIArgInfo::getDirect(); } + // Pass 128-bit/256-bit vector values via vector registers directly. + if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && + (getTarget().hasFeature("lsx"))) || + ((getContext().getTypeSize(Ty) == 256) && + getTarget().hasFeature("lasx")))) + return ABIArgInfo::getDirect(); + // Complex types for the *f or *d ABI must be passed directly rather than // using CoerceAndExpand. if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { From d6bfa3341181a80de6c8aede807fc1acc3ce8d9b Mon Sep 17 00:00:00 2001 From: licongtian Date: Wed, 25 Oct 2023 17:41:03 +0800 Subject: [PATCH 122/144] [Clang][LoongArch] Support the builtin functions for LSX This patch does the following work: - Define the builtin functions for LSX - Add the header file lsxintrin.h - Add the immediate number range checking for LSX builtins --- .../include/clang/Basic/BuiltinsLoongArch.def | 43 +- .../clang/Basic/BuiltinsLoongArchBase.def | 53 + .../clang/Basic/BuiltinsLoongArchLSX.def | 953 +++++ clang/lib/Headers/CMakeLists.txt | 1 + clang/lib/Headers/lsxintrin.h | 3726 +++++++++++++++++ clang/lib/Sema/SemaChecking.cpp | 229 +- 6 files changed, 4965 insertions(+), 40 deletions(-) create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchBase.def create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchLSX.def create mode 100644 clang/lib/Headers/lsxintrin.h diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def index 20510e18fe58c1..9ec19c31095aff 100644 --- a/clang/include/clang/Basic/BuiltinsLoongArch.def +++ b/clang/include/clang/Basic/BuiltinsLoongArch.def @@ -15,46 +15,11 @@ # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif -// TODO: Support more builtins. -TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") -TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") -TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") -TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") +// Definition of LoongArch basic builtins. +#include "clang/Basic/BuiltinsLoongArchBase.def" -TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") - -TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") - -TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") - -TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") +// Definition of LSX builtins. +#include "clang/Basic/BuiltinsLoongArchLSX.def" #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsLoongArchBase.def b/clang/include/clang/Basic/BuiltinsLoongArchBase.def new file mode 100644 index 00000000000000..cbb239223aae3b --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsLoongArchBase.def @@ -0,0 +1,53 @@ +//============------------ BuiltinsLoongArchBase.def -------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArch-specific basic builtin function database. +// Users of this file must define the BUILTIN macro to make use of this +// information. +// +//===----------------------------------------------------------------------===// + +TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") +TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") +TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") +TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def new file mode 100644 index 00000000000000..8e6aec886c50cd --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def @@ -0,0 +1,953 @@ +//=============------------- BuiltinsLoongArchLSX.def --------------- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArch-specific LSX builtin function database. +// Users of this file must define the BUILTIN macro to make use of this +// information. +// +//===----------------------------------------------------------------------===// + +TARGET_BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + + +TARGET_BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc", "lsx") + + +TARGET_BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrintrne_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrintrne_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrintrz_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrintrz_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrintrp_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrintrp_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrintrm_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrintrm_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc", "lsx") diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 02a0c81644b6c6..cee61a7e4fb088 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -93,6 +93,7 @@ set(hlsl_files set(loongarch_files larchintrin.h + lsxintrin.h ) set(mips_msa_files diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h new file mode 100644 index 00000000000000..a29bc7757ab568 --- /dev/null +++ b/clang/lib/Headers/lsxintrin.h @@ -0,0 +1,3726 @@ +/*===------------- lsxintrin.h - LoongArch LSX intrinsics ------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _LOONGSON_SXINTRIN_H +#define _LOONGSON_SXINTRIN_H 1 + +#if defined(__loongarch_sx) +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); + +typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); +typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); +typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) + +#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) + +#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) + +#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) + +#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) + +#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) + +#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) + +#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) + +#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) + +#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) + +#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) + +#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) + +#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) + +#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) + +#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) + +#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) + +#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) + +#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) + +#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) + +#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) + +#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) + +#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) + +#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) + +#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) + +#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) + +#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) + +#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) + +#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) + +#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) + +#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) + +#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) + +#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) + +#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) + +#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) + +#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) + +#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) + +#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) + +#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) + +#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) + +#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) + +#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) + +#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) + +#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) + +#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) + +#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) + +#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) + +#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) + +#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) + +#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) + +#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) + +#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) + +#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) + +#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) + +#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) + +#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) + +#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) + +#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) + +#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) + +#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) + +#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) + +#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) + +#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) + +#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) + +#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) + +#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) + +#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_b(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_h(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_w(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_d(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); +} + +#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) + +#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) + +#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) + +#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vand_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vnor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vxor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); +} + +#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) + +#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) + +#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) + +#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_b(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_h(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_w(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_d(long int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_b(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_h(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_w(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_d(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_b(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_h(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_w(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_d(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_h(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_w(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_d(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); +} + +#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) + +#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) + +#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) + +#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) + +#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) + +#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) + +#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) + +#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ + ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) + +#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) + +#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) + +#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) + +#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfadd_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfadd_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfsub_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfsub_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmul_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmul_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfdiv_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfdiv_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { + return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmin_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmin_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmina_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmina_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmax_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmax_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmaxa_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmaxa_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfclass_s(__m128 _1) { + return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfclass_d(__m128d _1) { + return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfsqrt_s(__m128 _1) { + return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfsqrt_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrecip_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrecip_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrint_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrint_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrsqrt_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrsqrt_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vflogb_s(__m128 _1) { + return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vflogb_d(__m128d _1) { + return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvth_s_h(__m128i _1) { + return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfcvth_d_s(__m128 _1) { + return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvtl_s_h(__m128i _1) { + return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfcvtl_d_s(__m128 _1) { + return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_wu_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_lu_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_wu_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_lu_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_w(__m128i _1) { + return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffint_d_l(__m128i _1) { + return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_wu(__m128i _1) { + return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffint_d_lu(__m128i _1) { + return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vandn_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_b(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_h(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_w(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_d(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) + +#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) + +#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) + +#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) + +#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) + +#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) + +#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) + +#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_h(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_w(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_d(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_l(__m128i _1, __m128i _2) { + return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftinth_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffinth_d_w(__m128i _1) { + return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffintl_d_w(__m128i _1) { + return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrzl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrzh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrpl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrph_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrml_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrmh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrnel_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrneh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrne_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrne_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrz_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrz_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrp_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrp_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrm_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrm_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); +} + +#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) + +#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) + +#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) + +#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, + (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, + (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, + (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, + (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, + (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, + (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, + (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, + (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_q(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_q(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) + +#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) + +#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) + +#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskgez_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsknz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_h_b(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_w_h(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_d_w(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_q_d(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_hu_bu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_wu_hu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_du_wu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_qu_du(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); +} + +#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) + +#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) + +#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) + +#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vextl_q_d(__m128i _1) { + return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); +} + +#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) + +#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vorn_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); +} + +#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vldx(void const *_1, long int _2) { + return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void + __lsx_vstx(__m128i _1, void *_2, long int _3) { + return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vextl_qu_du(__m128i _1) { + return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); +} + +#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) + +#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) + +#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) + +#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) + +#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) + +#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) + +#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) + +#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) + +#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) + +#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); +} + +#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) + +#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) + +#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) + +#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) + +#endif /* defined(__loongarch_sx) */ +#endif /* _LOONGSON_SXINTRIN_H */ diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 7972919d142011..e131061eb88548 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3928,6 +3928,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, switch (BuiltinID) { default: break; + // Basic intrinsics. case LoongArch::BI__builtin_loongarch_cacop_d: case LoongArch::BI__builtin_loongarch_cacop_w: { SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); @@ -3956,8 +3957,234 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_loongarch_movfcsr2gr: case LoongArch::BI__builtin_loongarch_movgr2fcsr: return SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(2)); - } + // LSX intrinsics. + case LoongArch::BI__builtin_lsx_vbitclri_b: + case LoongArch::BI__builtin_lsx_vbitrevi_b: + case LoongArch::BI__builtin_lsx_vbitseti_b: + case LoongArch::BI__builtin_lsx_vsat_b: + case LoongArch::BI__builtin_lsx_vsat_bu: + case LoongArch::BI__builtin_lsx_vslli_b: + case LoongArch::BI__builtin_lsx_vsrai_b: + case LoongArch::BI__builtin_lsx_vsrari_b: + case LoongArch::BI__builtin_lsx_vsrli_b: + case LoongArch::BI__builtin_lsx_vsllwil_h_b: + case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: + case LoongArch::BI__builtin_lsx_vrotri_b: + case LoongArch::BI__builtin_lsx_vsrlri_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case LoongArch::BI__builtin_lsx_vbitclri_h: + case LoongArch::BI__builtin_lsx_vbitrevi_h: + case LoongArch::BI__builtin_lsx_vbitseti_h: + case LoongArch::BI__builtin_lsx_vsat_h: + case LoongArch::BI__builtin_lsx_vsat_hu: + case LoongArch::BI__builtin_lsx_vslli_h: + case LoongArch::BI__builtin_lsx_vsrai_h: + case LoongArch::BI__builtin_lsx_vsrari_h: + case LoongArch::BI__builtin_lsx_vsrli_h: + case LoongArch::BI__builtin_lsx_vsllwil_w_h: + case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: + case LoongArch::BI__builtin_lsx_vrotri_h: + case LoongArch::BI__builtin_lsx_vsrlri_h: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + case LoongArch::BI__builtin_lsx_vssrarni_b_h: + case LoongArch::BI__builtin_lsx_vssrarni_bu_h: + case LoongArch::BI__builtin_lsx_vssrani_b_h: + case LoongArch::BI__builtin_lsx_vssrani_bu_h: + case LoongArch::BI__builtin_lsx_vsrarni_b_h: + case LoongArch::BI__builtin_lsx_vsrlni_b_h: + case LoongArch::BI__builtin_lsx_vsrlrni_b_h: + case LoongArch::BI__builtin_lsx_vssrlni_b_h: + case LoongArch::BI__builtin_lsx_vssrlni_bu_h: + case LoongArch::BI__builtin_lsx_vssrlrni_b_h: + case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: + case LoongArch::BI__builtin_lsx_vsrani_b_h: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + case LoongArch::BI__builtin_lsx_vslei_bu: + case LoongArch::BI__builtin_lsx_vslei_hu: + case LoongArch::BI__builtin_lsx_vslei_wu: + case LoongArch::BI__builtin_lsx_vslei_du: + case LoongArch::BI__builtin_lsx_vslti_bu: + case LoongArch::BI__builtin_lsx_vslti_hu: + case LoongArch::BI__builtin_lsx_vslti_wu: + case LoongArch::BI__builtin_lsx_vslti_du: + case LoongArch::BI__builtin_lsx_vmaxi_bu: + case LoongArch::BI__builtin_lsx_vmaxi_hu: + case LoongArch::BI__builtin_lsx_vmaxi_wu: + case LoongArch::BI__builtin_lsx_vmaxi_du: + case LoongArch::BI__builtin_lsx_vmini_bu: + case LoongArch::BI__builtin_lsx_vmini_hu: + case LoongArch::BI__builtin_lsx_vmini_wu: + case LoongArch::BI__builtin_lsx_vmini_du: + case LoongArch::BI__builtin_lsx_vaddi_bu: + case LoongArch::BI__builtin_lsx_vaddi_hu: + case LoongArch::BI__builtin_lsx_vaddi_wu: + case LoongArch::BI__builtin_lsx_vaddi_du: + case LoongArch::BI__builtin_lsx_vbitclri_w: + case LoongArch::BI__builtin_lsx_vbitrevi_w: + case LoongArch::BI__builtin_lsx_vbitseti_w: + case LoongArch::BI__builtin_lsx_vsat_w: + case LoongArch::BI__builtin_lsx_vsat_wu: + case LoongArch::BI__builtin_lsx_vslli_w: + case LoongArch::BI__builtin_lsx_vsrai_w: + case LoongArch::BI__builtin_lsx_vsrari_w: + case LoongArch::BI__builtin_lsx_vsrli_w: + case LoongArch::BI__builtin_lsx_vsllwil_d_w: + case LoongArch::BI__builtin_lsx_vsllwil_du_wu: + case LoongArch::BI__builtin_lsx_vsrlri_w: + case LoongArch::BI__builtin_lsx_vrotri_w: + case LoongArch::BI__builtin_lsx_vsubi_bu: + case LoongArch::BI__builtin_lsx_vsubi_hu: + case LoongArch::BI__builtin_lsx_vbsrl_v: + case LoongArch::BI__builtin_lsx_vbsll_v: + case LoongArch::BI__builtin_lsx_vsubi_wu: + case LoongArch::BI__builtin_lsx_vsubi_du: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + case LoongArch::BI__builtin_lsx_vssrarni_h_w: + case LoongArch::BI__builtin_lsx_vssrarni_hu_w: + case LoongArch::BI__builtin_lsx_vssrani_h_w: + case LoongArch::BI__builtin_lsx_vssrani_hu_w: + case LoongArch::BI__builtin_lsx_vsrarni_h_w: + case LoongArch::BI__builtin_lsx_vsrani_h_w: + case LoongArch::BI__builtin_lsx_vfrstpi_b: + case LoongArch::BI__builtin_lsx_vfrstpi_h: + case LoongArch::BI__builtin_lsx_vsrlni_h_w: + case LoongArch::BI__builtin_lsx_vsrlrni_h_w: + case LoongArch::BI__builtin_lsx_vssrlni_h_w: + case LoongArch::BI__builtin_lsx_vssrlni_hu_w: + case LoongArch::BI__builtin_lsx_vssrlrni_h_w: + case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + case LoongArch::BI__builtin_lsx_vbitclri_d: + case LoongArch::BI__builtin_lsx_vbitrevi_d: + case LoongArch::BI__builtin_lsx_vbitseti_d: + case LoongArch::BI__builtin_lsx_vsat_d: + case LoongArch::BI__builtin_lsx_vsat_du: + case LoongArch::BI__builtin_lsx_vslli_d: + case LoongArch::BI__builtin_lsx_vsrai_d: + case LoongArch::BI__builtin_lsx_vsrli_d: + case LoongArch::BI__builtin_lsx_vsrari_d: + case LoongArch::BI__builtin_lsx_vrotri_d: + case LoongArch::BI__builtin_lsx_vsrlri_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); + case LoongArch::BI__builtin_lsx_vssrarni_w_d: + case LoongArch::BI__builtin_lsx_vssrarni_wu_d: + case LoongArch::BI__builtin_lsx_vssrani_w_d: + case LoongArch::BI__builtin_lsx_vssrani_wu_d: + case LoongArch::BI__builtin_lsx_vsrarni_w_d: + case LoongArch::BI__builtin_lsx_vsrlni_w_d: + case LoongArch::BI__builtin_lsx_vsrlrni_w_d: + case LoongArch::BI__builtin_lsx_vssrlni_w_d: + case LoongArch::BI__builtin_lsx_vssrlni_wu_d: + case LoongArch::BI__builtin_lsx_vssrlrni_w_d: + case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: + case LoongArch::BI__builtin_lsx_vsrani_w_d: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); + case LoongArch::BI__builtin_lsx_vssrarni_d_q: + case LoongArch::BI__builtin_lsx_vssrarni_du_q: + case LoongArch::BI__builtin_lsx_vssrani_d_q: + case LoongArch::BI__builtin_lsx_vssrani_du_q: + case LoongArch::BI__builtin_lsx_vsrarni_d_q: + case LoongArch::BI__builtin_lsx_vssrlni_d_q: + case LoongArch::BI__builtin_lsx_vssrlni_du_q: + case LoongArch::BI__builtin_lsx_vssrlrni_d_q: + case LoongArch::BI__builtin_lsx_vssrlrni_du_q: + case LoongArch::BI__builtin_lsx_vsrani_d_q: + case LoongArch::BI__builtin_lsx_vsrlrni_d_q: + case LoongArch::BI__builtin_lsx_vsrlni_d_q: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); + case LoongArch::BI__builtin_lsx_vseqi_b: + case LoongArch::BI__builtin_lsx_vseqi_h: + case LoongArch::BI__builtin_lsx_vseqi_w: + case LoongArch::BI__builtin_lsx_vseqi_d: + case LoongArch::BI__builtin_lsx_vslti_b: + case LoongArch::BI__builtin_lsx_vslti_h: + case LoongArch::BI__builtin_lsx_vslti_w: + case LoongArch::BI__builtin_lsx_vslti_d: + case LoongArch::BI__builtin_lsx_vslei_b: + case LoongArch::BI__builtin_lsx_vslei_h: + case LoongArch::BI__builtin_lsx_vslei_w: + case LoongArch::BI__builtin_lsx_vslei_d: + case LoongArch::BI__builtin_lsx_vmaxi_b: + case LoongArch::BI__builtin_lsx_vmaxi_h: + case LoongArch::BI__builtin_lsx_vmaxi_w: + case LoongArch::BI__builtin_lsx_vmaxi_d: + case LoongArch::BI__builtin_lsx_vmini_b: + case LoongArch::BI__builtin_lsx_vmini_h: + case LoongArch::BI__builtin_lsx_vmini_w: + case LoongArch::BI__builtin_lsx_vmini_d: + return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); + case LoongArch::BI__builtin_lsx_vandi_b: + case LoongArch::BI__builtin_lsx_vnori_b: + case LoongArch::BI__builtin_lsx_vori_b: + case LoongArch::BI__builtin_lsx_vshuf4i_b: + case LoongArch::BI__builtin_lsx_vshuf4i_h: + case LoongArch::BI__builtin_lsx_vshuf4i_w: + case LoongArch::BI__builtin_lsx_vxori_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); + case LoongArch::BI__builtin_lsx_vbitseli_b: + case LoongArch::BI__builtin_lsx_vshuf4i_d: + case LoongArch::BI__builtin_lsx_vextrins_b: + case LoongArch::BI__builtin_lsx_vextrins_h: + case LoongArch::BI__builtin_lsx_vextrins_w: + case LoongArch::BI__builtin_lsx_vextrins_d: + case LoongArch::BI__builtin_lsx_vpermi_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); + case LoongArch::BI__builtin_lsx_vpickve2gr_b: + case LoongArch::BI__builtin_lsx_vpickve2gr_bu: + case LoongArch::BI__builtin_lsx_vreplvei_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + case LoongArch::BI__builtin_lsx_vinsgr2vr_b: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + case LoongArch::BI__builtin_lsx_vpickve2gr_h: + case LoongArch::BI__builtin_lsx_vpickve2gr_hu: + case LoongArch::BI__builtin_lsx_vreplvei_h: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case LoongArch::BI__builtin_lsx_vinsgr2vr_h: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); + case LoongArch::BI__builtin_lsx_vpickve2gr_w: + case LoongArch::BI__builtin_lsx_vpickve2gr_wu: + case LoongArch::BI__builtin_lsx_vreplvei_w: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); + case LoongArch::BI__builtin_lsx_vinsgr2vr_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); + case LoongArch::BI__builtin_lsx_vpickve2gr_d: + case LoongArch::BI__builtin_lsx_vpickve2gr_du: + case LoongArch::BI__builtin_lsx_vreplvei_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + case LoongArch::BI__builtin_lsx_vinsgr2vr_d: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 1); + case LoongArch::BI__builtin_lsx_vstelm_b: + return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); + case LoongArch::BI__builtin_lsx_vstelm_h: + return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); + case LoongArch::BI__builtin_lsx_vstelm_w: + return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); + case LoongArch::BI__builtin_lsx_vstelm_d: + return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); + case LoongArch::BI__builtin_lsx_vldrepl_b: + case LoongArch::BI__builtin_lsx_vld: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); + case LoongArch::BI__builtin_lsx_vldrepl_h: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); + case LoongArch::BI__builtin_lsx_vldrepl_w: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); + case LoongArch::BI__builtin_lsx_vldrepl_d: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); + case LoongArch::BI__builtin_lsx_vst: + return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); + case LoongArch::BI__builtin_lsx_vldi: + return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); + case LoongArch::BI__builtin_lsx_vrepli_b: + case LoongArch::BI__builtin_lsx_vrepli_h: + case LoongArch::BI__builtin_lsx_vrepli_w: + case LoongArch::BI__builtin_lsx_vrepli_d: + return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); + } return false; } From a4005e729c8d9dba9ba19f3ce4ad5b60e64dc467 Mon Sep 17 00:00:00 2001 From: licongtian Date: Wed, 25 Oct 2023 17:44:06 +0800 Subject: [PATCH 123/144] [Clang][LoongArch] Support the builtin functions for LASX This patch does the following work: - Define the builtin functions for LASX - Add the header files lasxintrin.h --- .../include/clang/Basic/BuiltinsLoongArch.def | 3 + .../clang/Basic/BuiltinsLoongArchLASX.def | 982 +++++ clang/lib/Headers/CMakeLists.txt | 1 + clang/lib/Headers/lasxintrin.h | 3860 +++++++++++++++++ clang/lib/Sema/SemaChecking.cpp | 227 + 5 files changed, 5073 insertions(+) create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchLASX.def create mode 100644 clang/lib/Headers/lasxintrin.h diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def index 9ec19c31095aff..95359a3fdc711d 100644 --- a/clang/include/clang/Basic/BuiltinsLoongArch.def +++ b/clang/include/clang/Basic/BuiltinsLoongArch.def @@ -21,5 +21,8 @@ // Definition of LSX builtins. #include "clang/Basic/BuiltinsLoongArchLSX.def" +// Definition of LASX builtins. +#include "clang/Basic/BuiltinsLoongArchLASX.def" + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def new file mode 100644 index 00000000000000..3de200f665b680 --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def @@ -0,0 +1,982 @@ +//=BuiltinsLoongArchLASX.def - LoongArch Builtin function database -- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArch-specific LASX builtin function database. +// Users of this file must define the BUILTIN macro to make use of this +// information. +// +//===----------------------------------------------------------------------===// + +TARGET_BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") + + +TARGET_BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc", "lasx") + + +TARGET_BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrintrne_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrintrne_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrintrz_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrintrz_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrintrp_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrintrp_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrintrm_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrintrm_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx") diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index cee61a7e4fb088..8b1e2bc4afa4dc 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -93,6 +93,7 @@ set(hlsl_files set(loongarch_files larchintrin.h + lasxintrin.h lsxintrin.h ) diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h new file mode 100644 index 00000000000000..6b4d5012a24b58 --- /dev/null +++ b/clang/lib/Headers/lasxintrin.h @@ -0,0 +1,3860 @@ +/*===------------ lasxintrin.h - LoongArch LASX intrinsics -----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _LOONGSON_ASXINTRIN_H +#define _LOONGSON_ASXINTRIN_H 1 + +#if defined(__loongarch_asx) + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); +typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); +typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) + +#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) + +#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) + +#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) + +#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) + +#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) + +#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) + +#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) + +#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) + +#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) + +#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) + +#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) + +#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) + +#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) + +#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) + +#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) + +#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) + +#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) + +#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) + +#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) + +#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) + +#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) + +#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) + +#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) + +#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) + +#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) + +#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) + +#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) + +#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) + +#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) + +#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) + +#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) + +#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) + +#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) + +#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) + +#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) + +#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) + +#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) + +#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) + +#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) + +#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) + +#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) + +#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) + +#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) + +#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) + +#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) + +#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) + +#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) + +#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) + +#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) + +#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) + +#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) + +#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) + +#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) + +#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) + +#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) + +#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvand_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvnor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvxor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); +} + +#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) + +#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) + +#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) + +#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_b(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_h(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_w(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_d(long int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfadd_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfadd_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfsub_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfsub_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmul_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmul_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfdiv_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfdiv_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { + return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmin_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmin_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmina_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmina_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmax_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmax_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfclass_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfclass_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfsqrt_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfsqrt_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrecip_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrecip_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrint_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrint_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrsqrt_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrsqrt_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvflogb_s(__m256 _1) { + return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvflogb_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvth_s_h(__m256i _1) { + return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfcvth_d_s(__m256 _1) { + return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvtl_s_h(__m256i _1) { + return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfcvtl_d_s(__m256 _1) { + return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_wu_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_lu_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_wu_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_lu_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_w(__m256i _1) { + return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffint_d_l(__m256i _1) { + return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_wu(__m256i _1) { + return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffint_d_lu(__m256i _1) { + return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_b(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_h(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_w(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_d(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); +} + +#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvandn_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) + +#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) + +#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) + +#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) + +#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) + +#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) + +#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) + +#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_l(__m256i _1, __m256i _2) { + return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftinth_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffinth_d_w(__m256i _1) { + return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffintl_d_w(__m256i _1) { + return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrzh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrzl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrph_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrpl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrmh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrml_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrneh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrnel_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrne_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrne_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrz_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrz_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrp_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrp_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrm_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrm_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); +} + +#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) + +#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) + +#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) + +#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvorn_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); +} + +#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvldx(void const *_1, long int _2) { + return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void + __lasx_xvstx(__m256i _1, void *_2, long int _3) { + return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvextl_qu_du(__m256i _1) { + return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); +} + +#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) + +#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_q(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_h_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_w_h(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_w(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_w_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_h(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_hu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_wu_hu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_wu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_wu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_hu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); +} + +#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvperm_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); +} + +#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) + +#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) + +#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) + +#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) + +#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) + +#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) + +#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) + +#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ + ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, + (v4u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, + (v8u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, + (v16u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, + (v32u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, + (v4u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, + (v8u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, + (v16u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, + (v32u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, + (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, + (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, + (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, + (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_q(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_q(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskgez_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsknz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_h_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_w_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_d_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_q_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_hu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_wu_hu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_du_wu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_qu_du(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); +} + +#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) + +#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) + +#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) + +#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvextl_q_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); +} + +#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) + +#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) + +#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) + +#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) + +#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) + +#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) + +#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) + +#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) + +#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) + +#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); +} + +#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ + ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) + +#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ + ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) + +#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) + +#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) + +#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) + +#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) + +#endif /* defined(__loongarch_asx). */ +#endif /* _LOONGSON_ASXINTRIN_H. */ diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index e131061eb88548..44f698abdb9fe2 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -4184,6 +4184,233 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lsx_vrepli_w: case LoongArch::BI__builtin_lsx_vrepli_d: return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); + + // LASX intrinsics. + case LoongArch::BI__builtin_lasx_xvbitclri_b: + case LoongArch::BI__builtin_lasx_xvbitrevi_b: + case LoongArch::BI__builtin_lasx_xvbitseti_b: + case LoongArch::BI__builtin_lasx_xvsat_b: + case LoongArch::BI__builtin_lasx_xvsat_bu: + case LoongArch::BI__builtin_lasx_xvslli_b: + case LoongArch::BI__builtin_lasx_xvsrai_b: + case LoongArch::BI__builtin_lasx_xvsrari_b: + case LoongArch::BI__builtin_lasx_xvsrli_b: + case LoongArch::BI__builtin_lasx_xvsllwil_h_b: + case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: + case LoongArch::BI__builtin_lasx_xvrotri_b: + case LoongArch::BI__builtin_lasx_xvsrlri_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case LoongArch::BI__builtin_lasx_xvbitclri_h: + case LoongArch::BI__builtin_lasx_xvbitrevi_h: + case LoongArch::BI__builtin_lasx_xvbitseti_h: + case LoongArch::BI__builtin_lasx_xvsat_h: + case LoongArch::BI__builtin_lasx_xvsat_hu: + case LoongArch::BI__builtin_lasx_xvslli_h: + case LoongArch::BI__builtin_lasx_xvsrai_h: + case LoongArch::BI__builtin_lasx_xvsrari_h: + case LoongArch::BI__builtin_lasx_xvsrli_h: + case LoongArch::BI__builtin_lasx_xvsllwil_w_h: + case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: + case LoongArch::BI__builtin_lasx_xvrotri_h: + case LoongArch::BI__builtin_lasx_xvsrlri_h: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + case LoongArch::BI__builtin_lasx_xvssrarni_b_h: + case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: + case LoongArch::BI__builtin_lasx_xvssrani_b_h: + case LoongArch::BI__builtin_lasx_xvssrani_bu_h: + case LoongArch::BI__builtin_lasx_xvsrarni_b_h: + case LoongArch::BI__builtin_lasx_xvsrlni_b_h: + case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: + case LoongArch::BI__builtin_lasx_xvssrlni_b_h: + case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: + case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: + case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: + case LoongArch::BI__builtin_lasx_xvsrani_b_h: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + case LoongArch::BI__builtin_lasx_xvslei_bu: + case LoongArch::BI__builtin_lasx_xvslei_hu: + case LoongArch::BI__builtin_lasx_xvslei_wu: + case LoongArch::BI__builtin_lasx_xvslei_du: + case LoongArch::BI__builtin_lasx_xvslti_bu: + case LoongArch::BI__builtin_lasx_xvslti_hu: + case LoongArch::BI__builtin_lasx_xvslti_wu: + case LoongArch::BI__builtin_lasx_xvslti_du: + case LoongArch::BI__builtin_lasx_xvmaxi_bu: + case LoongArch::BI__builtin_lasx_xvmaxi_hu: + case LoongArch::BI__builtin_lasx_xvmaxi_wu: + case LoongArch::BI__builtin_lasx_xvmaxi_du: + case LoongArch::BI__builtin_lasx_xvmini_bu: + case LoongArch::BI__builtin_lasx_xvmini_hu: + case LoongArch::BI__builtin_lasx_xvmini_wu: + case LoongArch::BI__builtin_lasx_xvmini_du: + case LoongArch::BI__builtin_lasx_xvaddi_bu: + case LoongArch::BI__builtin_lasx_xvaddi_hu: + case LoongArch::BI__builtin_lasx_xvaddi_wu: + case LoongArch::BI__builtin_lasx_xvaddi_du: + case LoongArch::BI__builtin_lasx_xvbitclri_w: + case LoongArch::BI__builtin_lasx_xvbitrevi_w: + case LoongArch::BI__builtin_lasx_xvbitseti_w: + case LoongArch::BI__builtin_lasx_xvsat_w: + case LoongArch::BI__builtin_lasx_xvsat_wu: + case LoongArch::BI__builtin_lasx_xvslli_w: + case LoongArch::BI__builtin_lasx_xvsrai_w: + case LoongArch::BI__builtin_lasx_xvsrari_w: + case LoongArch::BI__builtin_lasx_xvsrli_w: + case LoongArch::BI__builtin_lasx_xvsllwil_d_w: + case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: + case LoongArch::BI__builtin_lasx_xvsrlri_w: + case LoongArch::BI__builtin_lasx_xvrotri_w: + case LoongArch::BI__builtin_lasx_xvsubi_bu: + case LoongArch::BI__builtin_lasx_xvsubi_hu: + case LoongArch::BI__builtin_lasx_xvsubi_wu: + case LoongArch::BI__builtin_lasx_xvsubi_du: + case LoongArch::BI__builtin_lasx_xvbsrl_v: + case LoongArch::BI__builtin_lasx_xvbsll_v: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + case LoongArch::BI__builtin_lasx_xvssrarni_h_w: + case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: + case LoongArch::BI__builtin_lasx_xvssrani_h_w: + case LoongArch::BI__builtin_lasx_xvssrani_hu_w: + case LoongArch::BI__builtin_lasx_xvsrarni_h_w: + case LoongArch::BI__builtin_lasx_xvsrani_h_w: + case LoongArch::BI__builtin_lasx_xvfrstpi_b: + case LoongArch::BI__builtin_lasx_xvfrstpi_h: + case LoongArch::BI__builtin_lasx_xvsrlni_h_w: + case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: + case LoongArch::BI__builtin_lasx_xvssrlni_h_w: + case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: + case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: + case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + case LoongArch::BI__builtin_lasx_xvbitclri_d: + case LoongArch::BI__builtin_lasx_xvbitrevi_d: + case LoongArch::BI__builtin_lasx_xvbitseti_d: + case LoongArch::BI__builtin_lasx_xvsat_d: + case LoongArch::BI__builtin_lasx_xvsat_du: + case LoongArch::BI__builtin_lasx_xvslli_d: + case LoongArch::BI__builtin_lasx_xvsrai_d: + case LoongArch::BI__builtin_lasx_xvsrli_d: + case LoongArch::BI__builtin_lasx_xvsrari_d: + case LoongArch::BI__builtin_lasx_xvrotri_d: + case LoongArch::BI__builtin_lasx_xvsrlri_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); + case LoongArch::BI__builtin_lasx_xvssrarni_w_d: + case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: + case LoongArch::BI__builtin_lasx_xvssrani_w_d: + case LoongArch::BI__builtin_lasx_xvssrani_wu_d: + case LoongArch::BI__builtin_lasx_xvsrarni_w_d: + case LoongArch::BI__builtin_lasx_xvsrlni_w_d: + case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: + case LoongArch::BI__builtin_lasx_xvssrlni_w_d: + case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: + case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: + case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: + case LoongArch::BI__builtin_lasx_xvsrani_w_d: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); + case LoongArch::BI__builtin_lasx_xvssrarni_d_q: + case LoongArch::BI__builtin_lasx_xvssrarni_du_q: + case LoongArch::BI__builtin_lasx_xvssrani_d_q: + case LoongArch::BI__builtin_lasx_xvssrani_du_q: + case LoongArch::BI__builtin_lasx_xvsrarni_d_q: + case LoongArch::BI__builtin_lasx_xvssrlni_d_q: + case LoongArch::BI__builtin_lasx_xvssrlni_du_q: + case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: + case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: + case LoongArch::BI__builtin_lasx_xvsrani_d_q: + case LoongArch::BI__builtin_lasx_xvsrlni_d_q: + case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); + case LoongArch::BI__builtin_lasx_xvseqi_b: + case LoongArch::BI__builtin_lasx_xvseqi_h: + case LoongArch::BI__builtin_lasx_xvseqi_w: + case LoongArch::BI__builtin_lasx_xvseqi_d: + case LoongArch::BI__builtin_lasx_xvslti_b: + case LoongArch::BI__builtin_lasx_xvslti_h: + case LoongArch::BI__builtin_lasx_xvslti_w: + case LoongArch::BI__builtin_lasx_xvslti_d: + case LoongArch::BI__builtin_lasx_xvslei_b: + case LoongArch::BI__builtin_lasx_xvslei_h: + case LoongArch::BI__builtin_lasx_xvslei_w: + case LoongArch::BI__builtin_lasx_xvslei_d: + case LoongArch::BI__builtin_lasx_xvmaxi_b: + case LoongArch::BI__builtin_lasx_xvmaxi_h: + case LoongArch::BI__builtin_lasx_xvmaxi_w: + case LoongArch::BI__builtin_lasx_xvmaxi_d: + case LoongArch::BI__builtin_lasx_xvmini_b: + case LoongArch::BI__builtin_lasx_xvmini_h: + case LoongArch::BI__builtin_lasx_xvmini_w: + case LoongArch::BI__builtin_lasx_xvmini_d: + return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); + case LoongArch::BI__builtin_lasx_xvandi_b: + case LoongArch::BI__builtin_lasx_xvnori_b: + case LoongArch::BI__builtin_lasx_xvori_b: + case LoongArch::BI__builtin_lasx_xvshuf4i_b: + case LoongArch::BI__builtin_lasx_xvshuf4i_h: + case LoongArch::BI__builtin_lasx_xvshuf4i_w: + case LoongArch::BI__builtin_lasx_xvxori_b: + case LoongArch::BI__builtin_lasx_xvpermi_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); + case LoongArch::BI__builtin_lasx_xvbitseli_b: + case LoongArch::BI__builtin_lasx_xvshuf4i_d: + case LoongArch::BI__builtin_lasx_xvextrins_b: + case LoongArch::BI__builtin_lasx_xvextrins_h: + case LoongArch::BI__builtin_lasx_xvextrins_w: + case LoongArch::BI__builtin_lasx_xvextrins_d: + case LoongArch::BI__builtin_lasx_xvpermi_q: + case LoongArch::BI__builtin_lasx_xvpermi_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); + case LoongArch::BI__builtin_lasx_xvrepl128vei_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + case LoongArch::BI__builtin_lasx_xvrepl128vei_h: + case LoongArch::BI__builtin_lasx_xvpickve2gr_w: + case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: + case LoongArch::BI__builtin_lasx_xvpickve_w_f: + case LoongArch::BI__builtin_lasx_xvpickve_w: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: + case LoongArch::BI__builtin_lasx_xvinsve0_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); + case LoongArch::BI__builtin_lasx_xvrepl128vei_w: + case LoongArch::BI__builtin_lasx_xvpickve2gr_d: + case LoongArch::BI__builtin_lasx_xvpickve2gr_du: + case LoongArch::BI__builtin_lasx_xvpickve_d_f: + case LoongArch::BI__builtin_lasx_xvpickve_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); + case LoongArch::BI__builtin_lasx_xvinsve0_d: + case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); + case LoongArch::BI__builtin_lasx_xvstelm_b: + return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); + case LoongArch::BI__builtin_lasx_xvstelm_h: + return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); + case LoongArch::BI__builtin_lasx_xvstelm_w: + return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); + case LoongArch::BI__builtin_lasx_xvstelm_d: + return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); + case LoongArch::BI__builtin_lasx_xvrepl128vei_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + case LoongArch::BI__builtin_lasx_xvldrepl_b: + case LoongArch::BI__builtin_lasx_xvld: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); + case LoongArch::BI__builtin_lasx_xvldrepl_h: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); + case LoongArch::BI__builtin_lasx_xvldrepl_w: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); + case LoongArch::BI__builtin_lasx_xvldrepl_d: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); + case LoongArch::BI__builtin_lasx_xvst: + return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); + case LoongArch::BI__builtin_lasx_xvldi: + return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); + case LoongArch::BI__builtin_lasx_xvrepli_b: + case LoongArch::BI__builtin_lasx_xvrepli_h: + case LoongArch::BI__builtin_lasx_xvrepli_w: + case LoongArch::BI__builtin_lasx_xvrepli_d: + return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); } return false; } From 673c530837faa5ddb45769ddee01d09e1f73d406 Mon Sep 17 00:00:00 2001 From: chenli Date: Fri, 27 Oct 2023 15:57:30 +0800 Subject: [PATCH 124/144] [LoongArch][CodeGen] Add LSX builtin testcases --- .../LoongArch/lsx/builtin-alias-error.c | 1359 +++++ .../CodeGen/LoongArch/lsx/builtin-alias.c | 4451 ++++++++++++++ .../CodeGen/LoongArch/lsx/builtin-error.c | 1382 +++++ clang/test/CodeGen/LoongArch/lsx/builtin.c | 5193 +++++++++++++++++ 4 files changed, 12385 insertions(+) create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-alias.c create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-error.c create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin.c diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c new file mode 100644 index 00000000000000..69cf2254fdd797 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c @@ -0,0 +1,1359 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s + +#include + +v16i8 vslli_b(v16i8 _1, int var) { + v16i8 res = __lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} + return res; +} + +v8i16 vslli_h(v8i16 _1, int var) { + v8i16 res = __lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} + return res; +} + +v4i32 vslli_w(v4i32 _1, int var) { + v4i32 res = __lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} + return res; +} + +v2i64 vslli_d(v2i64 _1, int var) { + v2i64 res = __lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} + return res; +} + +v16i8 vsrai_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} + return res; +} + +v8i16 vsrai_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} + return res; +} + +v4i32 vsrai_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} + return res; +} + +v2i64 vsrai_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} + return res; +} + +v16i8 vsrari_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} + return res; +} + +v8i16 vsrari_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} + return res; +} + +v4i32 vsrari_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} + return res; +} + +v2i64 vsrari_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} + return res; +} + +v16i8 vsrli_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} + return res; +} + +v8i16 vsrli_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} + return res; +} + +v4i32 vsrli_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} + return res; +} + +v2i64 vsrli_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} + return res; +} + +v16i8 vsrlri_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} + return res; +} + +v8i16 vsrlri_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} + return res; +} + +v4i32 vsrlri_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} + return res; +} + +v2i64 vsrlri_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} + return res; +} + +v16u8 vbitclri_b(v16u8 _1, int var) { + v16u8 res = __lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} + return res; +} + +v8u16 vbitclri_h(v8u16 _1, int var) { + v8u16 res = __lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} + return res; +} + +v4u32 vbitclri_w(v4u32 _1, int var) { + v4u32 res = __lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} + return res; +} + +v2u64 vbitclri_d(v2u64 _1, int var) { + v2u64 res = __lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} + return res; +} + +v16u8 vbitseti_b(v16u8 _1, int var) { + v16u8 res = __lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} + return res; +} + +v8u16 vbitseti_h(v8u16 _1, int var) { + v8u16 res = __lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} + return res; +} + +v4u32 vbitseti_w(v4u32 _1, int var) { + v4u32 res = __lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} + return res; +} + +v2u64 vbitseti_d(v2u64 _1, int var) { + v2u64 res = __lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} + return res; +} + +v16u8 vbitrevi_b(v16u8 _1, int var) { + v16u8 res = __lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} + return res; +} + +v8u16 vbitrevi_h(v8u16 _1, int var) { + v8u16 res = __lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} + return res; +} + +v4u32 vbitrevi_w(v4u32 _1, int var) { + v4u32 res = __lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} + return res; +} + +v2u64 vbitrevi_d(v2u64 _1, int var) { + v2u64 res = __lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} + return res; +} + +v16i8 vaddi_bu(v16i8 _1, int var) { + v16i8 res = __lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} + return res; +} + +v8i16 vaddi_hu(v8i16 _1, int var) { + v8i16 res = __lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} + return res; +} + +v4i32 vaddi_wu(v4i32 _1, int var) { + v4i32 res = __lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} + return res; +} + +v2i64 vaddi_du(v2i64 _1, int var) { + v2i64 res = __lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} + return res; +} + +v16i8 vsubi_bu(v16i8 _1, int var) { + v16i8 res = __lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} + return res; +} + +v8i16 vsubi_hu(v8i16 _1, int var) { + v8i16 res = __lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} + return res; +} + +v4i32 vsubi_wu(v4i32 _1, int var) { + v4i32 res = __lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} + return res; +} + +v2i64 vsubi_du(v2i64 _1, int var) { + v2i64 res = __lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} + return res; +} + +v16i8 vmaxi_b(v16i8 _1, int var) { + v16i8 res = __lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} + return res; +} + +v8i16 vmaxi_h(v8i16 _1, int var) { + v8i16 res = __lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} + return res; +} + +v4i32 vmaxi_w(v4i32 _1, int var) { + v4i32 res = __lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} + return res; +} + +v2i64 vmaxi_d(v2i64 _1, int var) { + v2i64 res = __lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} + return res; +} + +v16u8 vmaxi_bu(v16u8 _1, int var) { + v16u8 res = __lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} + return res; +} + +v8u16 vmaxi_hu(v8u16 _1, int var) { + v8u16 res = __lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} + return res; +} + +v4u32 vmaxi_wu(v4u32 _1, int var) { + v4u32 res = __lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} + return res; +} + +v2u64 vmaxi_du(v2u64 _1, int var) { + v2u64 res = __lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} + return res; +} + +v16i8 vmini_b(v16i8 _1, int var) { + v16i8 res = __lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} + return res; +} + +v8i16 vmini_h(v8i16 _1, int var) { + v8i16 res = __lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} + return res; +} + +v4i32 vmini_w(v4i32 _1, int var) { + v4i32 res = __lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} + return res; +} + +v2i64 vmini_d(v2i64 _1, int var) { + v2i64 res = __lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} + return res; +} + +v16u8 vmini_bu(v16u8 _1, int var) { + v16u8 res = __lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} + return res; +} + +v8u16 vmini_hu(v8u16 _1, int var) { + v8u16 res = __lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} + return res; +} + +v4u32 vmini_wu(v4u32 _1, int var) { + v4u32 res = __lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} + return res; +} + +v2u64 vmini_du(v2u64 _1, int var) { + v2u64 res = __lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} + return res; +} + +v16i8 vseqi_b(v16i8 _1, int var) { + v16i8 res = __lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} + return res; +} + +v8i16 vseqi_h(v8i16 _1, int var) { + v8i16 res = __lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} + return res; +} + +v4i32 vseqi_w(v4i32 _1, int var) { + v4i32 res = __lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} + return res; +} + +v2i64 vseqi_d(v2i64 _1, int var) { + v2i64 res = __lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} + return res; +} + +v16i8 vslti_b(v16i8 _1, int var) { + v16i8 res = __lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} + return res; +} + +v8i16 vslti_h(v8i16 _1, int var) { + v8i16 res = __lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} + return res; +} + +v4i32 vslti_w(v4i32 _1, int var) { + v4i32 res = __lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} + return res; +} + +v2i64 vslti_d(v2i64 _1, int var) { + v2i64 res = __lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} + return res; +} + +v16i8 vslti_bu(v16u8 _1, int var) { + v16i8 res = __lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} + return res; +} + +v8i16 vslti_hu(v8u16 _1, int var) { + v8i16 res = __lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} + return res; +} + +v4i32 vslti_wu(v4u32 _1, int var) { + v4i32 res = __lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} + return res; +} + +v2i64 vslti_du(v2u64 _1, int var) { + v2i64 res = __lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} + return res; +} + +v16i8 vslei_b(v16i8 _1, int var) { + v16i8 res = __lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} + return res; +} + +v8i16 vslei_h(v8i16 _1, int var) { + v8i16 res = __lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} + return res; +} + +v4i32 vslei_w(v4i32 _1, int var) { + v4i32 res = __lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} + return res; +} + +v2i64 vslei_d(v2i64 _1, int var) { + v2i64 res = __lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} + return res; +} + +v16i8 vslei_bu(v16u8 _1, int var) { + v16i8 res = __lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} + return res; +} + +v8i16 vslei_hu(v8u16 _1, int var) { + v8i16 res = __lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} + return res; +} + +v4i32 vslei_wu(v4u32 _1, int var) { + v4i32 res = __lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} + return res; +} + +v2i64 vslei_du(v2u64 _1, int var) { + v2i64 res = __lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} + return res; +} + +v16i8 vsat_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} + return res; +} + +v8i16 vsat_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} + return res; +} + +v4i32 vsat_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} + return res; +} + +v2i64 vsat_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} + return res; +} + +v16u8 vsat_bu(v16u8 _1, int var) { + v16u8 res = __lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} + return res; +} + +v8u16 vsat_hu(v8u16 _1, int var) { + v8u16 res = __lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} + return res; +} + +v4u32 vsat_wu(v4u32 _1, int var) { + v4u32 res = __lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} + return res; +} + +v2u64 vsat_du(v2u64 _1, int var) { + v2u64 res = __lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} + return res; +} + +v16i8 vreplvei_b(v16i8 _1, int var) { + v16i8 res = __lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} + return res; +} + +v8i16 vreplvei_h(v8i16 _1, int var) { + v8i16 res = __lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} + return res; +} + +v4i32 vreplvei_w(v4i32 _1, int var) { + v4i32 res = __lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} + return res; +} + +v2i64 vreplvei_d(v2i64 _1, int var) { + v2i64 res = __lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} + return res; +} + +v16u8 vandi_b(v16u8 _1, int var) { + v16u8 res = __lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} + return res; +} + +v16u8 vori_b(v16u8 _1, int var) { + v16u8 res = __lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} + return res; +} + +v16u8 vnori_b(v16u8 _1, int var) { + v16u8 res = __lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} + return res; +} + +v16u8 vxori_b(v16u8 _1, int var) { + v16u8 res = __lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} + return res; +} + +v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { + v16u8 res = __lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} + return res; +} + +v16i8 vshuf4i_b(v16i8 _1, int var) { + v16i8 res = __lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} + return res; +} + +v8i16 vshuf4i_h(v8i16 _1, int var) { + v8i16 res = __lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} + return res; +} + +v4i32 vshuf4i_w(v4i32 _1, int var) { + v4i32 res = __lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} + return res; +} + +int vpickve2gr_b(v16i8 _1, int var) { + int res = __lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} + return res; +} + +int vpickve2gr_h(v8i16 _1, int var) { + int res = __lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} + return res; +} + +int vpickve2gr_w(v4i32 _1, int var) { + int res = __lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} + return res; +} + +long vpickve2gr_d(v2i64 _1, int var) { + long res = __lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_bu(v16i8 _1, int var) { + unsigned int res = __lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_hu(v8i16 _1, int var) { + unsigned int res = __lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_wu(v4i32 _1, int var) { + unsigned int res = __lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} + return res; +} + +unsigned long int vpickve2gr_du(v2i64 _1, int var) { + unsigned long int res = __lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} + return res; +} + +v16i8 vinsgr2vr_b(v16i8 _1, int var) { + v16i8 res = __lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} + return res; +} + +v8i16 vinsgr2vr_h(v8i16 _1, int var) { + v8i16 res = __lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} + return res; +} + +v4i32 vinsgr2vr_w(v4i32 _1, int var) { + v4i32 res = __lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} + return res; +} + +v2i64 vinsgr2vr_d(v2i64 _1, int var) { + v2i64 res = __lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} + return res; +} + +v8i16 vsllwil_h_b(v16i8 _1, int var) { + v8i16 res = __lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} + return res; +} + +v4i32 vsllwil_w_h(v8i16 _1, int var) { + v4i32 res = __lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} + return res; +} + +v2i64 vsllwil_d_w(v4i32 _1, int var) { + v2i64 res = __lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} + return res; +} + +v8u16 vsllwil_hu_bu(v16u8 _1, int var) { + v8u16 res = __lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} + return res; +} + +v4u32 vsllwil_wu_hu(v8u16 _1, int var) { + v4u32 res = __lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} + return res; +} + +v2u64 vsllwil_du_wu(v4u32 _1, int var) { + v2u64 res = __lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} + return res; +} + +v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} + return res; +} + +v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} + return res; +} + +v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} + return res; +} + +v16i8 vbsrl_v(v16i8 _1, int var) { + v16i8 res = __lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} + return res; +} + +v16i8 vbsll_v(v16i8 _1, int var) { + v16i8 res = __lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} + return res; +} + +v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} + return res; +} + +v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} + return res; +} + +v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} + return res; +} + +v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} + return res; +} + +void vstelm_b_idx(v16i8 _1, void *_2, int var) { + __lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + __lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +} + +void vstelm_h_idx(v8i16 _1, void *_2, int var) { + __lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + __lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +} + +void vstelm_w_idx(v4i32 _1, void *_2, int var) { + __lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +} + +void vstelm_d_idx(v2i64 _1, void *_2, int var) { + __lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + __lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + __lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +} + +void vstelm_b(v16i8 _1, void *_2, int var) { + __lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} + __lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} + __lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +} + +void vstelm_h(v8i16 _1, void *_2, int var) { + __lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} + __lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} + __lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +} + +void vstelm_w(v4i32 _1, void *_2, int var) { + __lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} + __lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} + __lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +} + +void vstelm_d(v2i64 _1, void *_2, int var) { + __lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} + __lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} + __lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +} + +v16i8 vldrepl_b(void *_1, int var) { + v16i8 res = __lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} + return res; +} + +v8i16 vldrepl_h(void *_1, int var) { + v8i16 res = __lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} + res |= __lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} + res |= __lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} + return res; +} + +v4i32 vldrepl_w(void *_1, int var) { + v4i32 res = __lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} + res |= __lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} + res |= __lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} + return res; +} + +v2i64 vldrepl_d(void *_1, int var) { + v2i64 res = __lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} + res |= __lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} + res |= __lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} + return res; +} + +v16i8 vrotri_b(v16i8 _1, int var) { + v16i8 res = __lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} + return res; +} + +v8i16 vrotri_h(v8i16 _1, int var) { + v8i16 res = __lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} + return res; +} + +v4i32 vrotri_w(v4i32 _1, int var) { + v4i32 res = __lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} + return res; +} + +v2i64 vrotri_d(v2i64 _1, int var) { + v2i64 res = __lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} + return res; +} + +v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} + return res; +} + +v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} + return res; +} + +v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} + return res; +} + +v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} + return res; +} + +v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} + return res; +} + +v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} + return res; +} + +v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} + return res; +} + +v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} + return res; +} + +v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} + return res; +} + +v16i8 vld(void *_1, int var) { + v16i8 res = __lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} + return res; +} + +void vst(v16i8 _1, void *_2, int var) { + __lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + __lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} +} + +v2i64 vldi(int var) { + v2i64 res = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + res |= __lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} + res |= __lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} + return res; +} + +v16i8 vrepli_b(int var) { + v16i8 res = __lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} + return res; +} + +v2i64 vrepli_d(int var) { + v2i64 res = __lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} + return res; +} + +v8i16 vrepli_h(int var) { + v8i16 res = __lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} + return res; +} + +v4i32 vrepli_w(int var) { + v4i32 res = __lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} + return res; +} diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c new file mode 100644 index 00000000000000..331e29fb7d17f7 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c @@ -0,0 +1,4451 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: @vsll_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); } +// CHECK-LABEL: @vsll_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); } +// CHECK-LABEL: @vsll_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); } +// CHECK-LABEL: @vsll_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); } +// CHECK-LABEL: @vslli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); } +// CHECK-LABEL: @vslli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); } +// CHECK-LABEL: @vslli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); } +// CHECK-LABEL: @vslli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); } +// CHECK-LABEL: @vsra_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); } +// CHECK-LABEL: @vsra_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); } +// CHECK-LABEL: @vsra_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); } +// CHECK-LABEL: @vsra_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); } +// CHECK-LABEL: @vsrai_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); } +// CHECK-LABEL: @vsrai_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); } +// CHECK-LABEL: @vsrai_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); } +// CHECK-LABEL: @vsrai_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); } +// CHECK-LABEL: @vsrar_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); } +// CHECK-LABEL: @vsrar_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); } +// CHECK-LABEL: @vsrar_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); } +// CHECK-LABEL: @vsrar_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); } +// CHECK-LABEL: @vsrari_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); } +// CHECK-LABEL: @vsrari_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); } +// CHECK-LABEL: @vsrari_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); } +// CHECK-LABEL: @vsrari_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); } +// CHECK-LABEL: @vsrl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); } +// CHECK-LABEL: @vsrl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); } +// CHECK-LABEL: @vsrl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); } +// CHECK-LABEL: @vsrl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); } +// CHECK-LABEL: @vsrli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); } +// CHECK-LABEL: @vsrli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); } +// CHECK-LABEL: @vsrli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); } +// CHECK-LABEL: @vsrli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); } +// CHECK-LABEL: @vsrlr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); } +// CHECK-LABEL: @vsrlr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); } +// CHECK-LABEL: @vsrlr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); } +// CHECK-LABEL: @vsrlr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); } +// CHECK-LABEL: @vsrlri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); } +// CHECK-LABEL: @vsrlri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); } +// CHECK-LABEL: @vsrlri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); } +// CHECK-LABEL: @vsrlri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); } +// CHECK-LABEL: @vbitclr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); } +// CHECK-LABEL: @vbitclr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); } +// CHECK-LABEL: @vbitclr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); } +// CHECK-LABEL: @vbitclr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); } +// CHECK-LABEL: @vbitclri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); } +// CHECK-LABEL: @vbitclri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); } +// CHECK-LABEL: @vbitclri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); } +// CHECK-LABEL: @vbitclri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); } +// CHECK-LABEL: @vbitset_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); } +// CHECK-LABEL: @vbitset_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); } +// CHECK-LABEL: @vbitset_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); } +// CHECK-LABEL: @vbitset_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); } +// CHECK-LABEL: @vbitseti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); } +// CHECK-LABEL: @vbitseti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); } +// CHECK-LABEL: @vbitseti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); } +// CHECK-LABEL: @vbitseti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); } +// CHECK-LABEL: @vbitrev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); } +// CHECK-LABEL: @vbitrev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); } +// CHECK-LABEL: @vbitrev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); } +// CHECK-LABEL: @vbitrev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); } +// CHECK-LABEL: @vbitrevi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); } +// CHECK-LABEL: @vbitrevi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); } +// CHECK-LABEL: @vbitrevi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); } +// CHECK-LABEL: @vbitrevi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); } +// CHECK-LABEL: @vadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); } +// CHECK-LABEL: @vadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); } +// CHECK-LABEL: @vadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); } +// CHECK-LABEL: @vadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); } +// CHECK-LABEL: @vaddi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); } +// CHECK-LABEL: @vaddi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); } +// CHECK-LABEL: @vaddi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); } +// CHECK-LABEL: @vaddi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); } +// CHECK-LABEL: @vsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); } +// CHECK-LABEL: @vsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); } +// CHECK-LABEL: @vsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); } +// CHECK-LABEL: @vsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); } +// CHECK-LABEL: @vsubi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); } +// CHECK-LABEL: @vsubi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); } +// CHECK-LABEL: @vsubi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); } +// CHECK-LABEL: @vsubi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); } +// CHECK-LABEL: @vmax_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); } +// CHECK-LABEL: @vmax_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); } +// CHECK-LABEL: @vmax_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); } +// CHECK-LABEL: @vmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); } +// CHECK-LABEL: @vmaxi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); } +// CHECK-LABEL: @vmaxi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); } +// CHECK-LABEL: @vmaxi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); } +// CHECK-LABEL: @vmaxi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); } +// CHECK-LABEL: @vmax_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); } +// CHECK-LABEL: @vmax_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); } +// CHECK-LABEL: @vmax_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); } +// CHECK-LABEL: @vmax_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); } +// CHECK-LABEL: @vmaxi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); } +// CHECK-LABEL: @vmaxi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); } +// CHECK-LABEL: @vmaxi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); } +// CHECK-LABEL: @vmaxi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); } +// CHECK-LABEL: @vmin_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); } +// CHECK-LABEL: @vmin_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); } +// CHECK-LABEL: @vmin_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); } +// CHECK-LABEL: @vmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); } +// CHECK-LABEL: @vmini_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); } +// CHECK-LABEL: @vmini_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); } +// CHECK-LABEL: @vmini_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); } +// CHECK-LABEL: @vmini_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); } +// CHECK-LABEL: @vmin_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); } +// CHECK-LABEL: @vmin_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); } +// CHECK-LABEL: @vmin_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); } +// CHECK-LABEL: @vmin_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); } +// CHECK-LABEL: @vmini_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); } +// CHECK-LABEL: @vmini_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); } +// CHECK-LABEL: @vmini_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); } +// CHECK-LABEL: @vmini_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); } +// CHECK-LABEL: @vseq_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); } +// CHECK-LABEL: @vseq_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); } +// CHECK-LABEL: @vseq_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); } +// CHECK-LABEL: @vseq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); } +// CHECK-LABEL: @vseqi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); } +// CHECK-LABEL: @vseqi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); } +// CHECK-LABEL: @vseqi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); } +// CHECK-LABEL: @vseqi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); } +// CHECK-LABEL: @vslti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); } +// CHECK-LABEL: @vslt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); } +// CHECK-LABEL: @vslt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); } +// CHECK-LABEL: @vslt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); } +// CHECK-LABEL: @vslt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); } +// CHECK-LABEL: @vslti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); } +// CHECK-LABEL: @vslti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); } +// CHECK-LABEL: @vslti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); } +// CHECK-LABEL: @vslt_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); } +// CHECK-LABEL: @vslt_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); } +// CHECK-LABEL: @vslt_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); } +// CHECK-LABEL: @vslt_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); } +// CHECK-LABEL: @vslti_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); } +// CHECK-LABEL: @vslti_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); } +// CHECK-LABEL: @vslti_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); } +// CHECK-LABEL: @vslti_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); } +// CHECK-LABEL: @vsle_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); } +// CHECK-LABEL: @vsle_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); } +// CHECK-LABEL: @vsle_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); } +// CHECK-LABEL: @vsle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); } +// CHECK-LABEL: @vslei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); } +// CHECK-LABEL: @vslei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); } +// CHECK-LABEL: @vslei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); } +// CHECK-LABEL: @vslei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); } +// CHECK-LABEL: @vsle_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); } +// CHECK-LABEL: @vsle_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); } +// CHECK-LABEL: @vsle_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); } +// CHECK-LABEL: @vsle_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); } +// CHECK-LABEL: @vslei_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); } +// CHECK-LABEL: @vslei_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); } +// CHECK-LABEL: @vslei_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); } +// CHECK-LABEL: @vslei_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); } +// CHECK-LABEL: @vsat_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); } +// CHECK-LABEL: @vsat_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); } +// CHECK-LABEL: @vsat_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); } +// CHECK-LABEL: @vsat_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); } +// CHECK-LABEL: @vsat_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); } +// CHECK-LABEL: @vsat_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); } +// CHECK-LABEL: @vsat_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); } +// CHECK-LABEL: @vsat_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); } +// CHECK-LABEL: @vadda_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); } +// CHECK-LABEL: @vadda_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); } +// CHECK-LABEL: @vadda_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); } +// CHECK-LABEL: @vadda_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); } +// CHECK-LABEL: @vsadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); } +// CHECK-LABEL: @vsadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); } +// CHECK-LABEL: @vsadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); } +// CHECK-LABEL: @vsadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); } +// CHECK-LABEL: @vsadd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); } +// CHECK-LABEL: @vsadd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); } +// CHECK-LABEL: @vsadd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); } +// CHECK-LABEL: @vsadd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); } +// CHECK-LABEL: @vavg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); } +// CHECK-LABEL: @vavg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); } +// CHECK-LABEL: @vavg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); } +// CHECK-LABEL: @vavg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); } +// CHECK-LABEL: @vavg_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); } +// CHECK-LABEL: @vavg_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); } +// CHECK-LABEL: @vavg_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); } +// CHECK-LABEL: @vavg_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); } +// CHECK-LABEL: @vavgr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); } +// CHECK-LABEL: @vavgr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); } +// CHECK-LABEL: @vavgr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); } +// CHECK-LABEL: @vavgr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); } +// CHECK-LABEL: @vavgr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); } +// CHECK-LABEL: @vavgr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); } +// CHECK-LABEL: @vavgr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); } +// CHECK-LABEL: @vavgr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); } +// CHECK-LABEL: @vssub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); } +// CHECK-LABEL: @vssub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); } +// CHECK-LABEL: @vssub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); } +// CHECK-LABEL: @vssub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); } +// CHECK-LABEL: @vssub_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); } +// CHECK-LABEL: @vssub_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); } +// CHECK-LABEL: @vssub_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); } +// CHECK-LABEL: @vssub_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); } +// CHECK-LABEL: @vabsd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); } +// CHECK-LABEL: @vabsd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); } +// CHECK-LABEL: @vabsd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); } +// CHECK-LABEL: @vabsd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); } +// CHECK-LABEL: @vabsd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); } +// CHECK-LABEL: @vabsd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); } +// CHECK-LABEL: @vabsd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); } +// CHECK-LABEL: @vabsd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); } +// CHECK-LABEL: @vmul_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); } +// CHECK-LABEL: @vmul_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); } +// CHECK-LABEL: @vmul_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); } +// CHECK-LABEL: @vmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); } +// CHECK-LABEL: @vmadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmadd_b(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmadd_h(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmadd_w(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmsub_b(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmsub_h(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmsub_w(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vdiv_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); } +// CHECK-LABEL: @vdiv_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); } +// CHECK-LABEL: @vdiv_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); } +// CHECK-LABEL: @vdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); } +// CHECK-LABEL: @vdiv_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); } +// CHECK-LABEL: @vdiv_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); } +// CHECK-LABEL: @vdiv_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); } +// CHECK-LABEL: @vdiv_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); } +// CHECK-LABEL: @vhaddw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); } +// CHECK-LABEL: @vhaddw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); } +// CHECK-LABEL: @vhaddw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); } +// CHECK-LABEL: @vhaddw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); } +// CHECK-LABEL: @vhaddw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); } +// CHECK-LABEL: @vhaddw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); } +// CHECK-LABEL: @vhsubw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); } +// CHECK-LABEL: @vhsubw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); } +// CHECK-LABEL: @vhsubw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); } +// CHECK-LABEL: @vhsubw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); } +// CHECK-LABEL: @vhsubw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); } +// CHECK-LABEL: @vhsubw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); } +// CHECK-LABEL: @vmod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); } +// CHECK-LABEL: @vmod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); } +// CHECK-LABEL: @vmod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); } +// CHECK-LABEL: @vmod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); } +// CHECK-LABEL: @vmod_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); } +// CHECK-LABEL: @vmod_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); } +// CHECK-LABEL: @vmod_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); } +// CHECK-LABEL: @vmod_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); } +// CHECK-LABEL: @vreplve_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); } +// CHECK-LABEL: @vreplve_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); } +// CHECK-LABEL: @vreplve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); } +// CHECK-LABEL: @vreplve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); } +// CHECK-LABEL: @vreplvei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); } +// CHECK-LABEL: @vreplvei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); } +// CHECK-LABEL: @vreplvei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); } +// CHECK-LABEL: @vreplvei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); } +// CHECK-LABEL: @vpickev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); } +// CHECK-LABEL: @vpickev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); } +// CHECK-LABEL: @vpickev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); } +// CHECK-LABEL: @vpickev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); } +// CHECK-LABEL: @vpickod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); } +// CHECK-LABEL: @vpickod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); } +// CHECK-LABEL: @vpickod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); } +// CHECK-LABEL: @vpickod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); } +// CHECK-LABEL: @vilvh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); } +// CHECK-LABEL: @vilvh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); } +// CHECK-LABEL: @vilvh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); } +// CHECK-LABEL: @vilvh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); } +// CHECK-LABEL: @vilvl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); } +// CHECK-LABEL: @vilvl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); } +// CHECK-LABEL: @vilvl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); } +// CHECK-LABEL: @vilvl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); } +// CHECK-LABEL: @vpackev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); } +// CHECK-LABEL: @vpackev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); } +// CHECK-LABEL: @vpackev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); } +// CHECK-LABEL: @vpackev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); } +// CHECK-LABEL: @vpackod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); } +// CHECK-LABEL: @vpackod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); } +// CHECK-LABEL: @vpackod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); } +// CHECK-LABEL: @vpackod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); } +// CHECK-LABEL: @vshuf_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vshuf_h(_1, _2, _3); +} +// CHECK-LABEL: @vshuf_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vshuf_w(_1, _2, _3); +} +// CHECK-LABEL: @vshuf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vshuf_d(_1, _2, _3); +} +// CHECK-LABEL: @vand_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); } +// CHECK-LABEL: @vandi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); } +// CHECK-LABEL: @vor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); } +// CHECK-LABEL: @vori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); } +// CHECK-LABEL: @vnor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); } +// CHECK-LABEL: @vnori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); } +// CHECK-LABEL: @vxor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); } +// CHECK-LABEL: @vxori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); } +// CHECK-LABEL: @vbitsel_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { + return __lsx_vbitsel_v(_1, _2, _3); +} +// CHECK-LABEL: @vbitseli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); } +// CHECK-LABEL: @vshuf4i_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); } +// CHECK-LABEL: @vshuf4i_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); } +// CHECK-LABEL: @vshuf4i_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); } +// CHECK-LABEL: @vreplgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); } +// CHECK-LABEL: @vreplgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); } +// CHECK-LABEL: @vreplgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); } +// CHECK-LABEL: @vreplgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); } +// CHECK-LABEL: @vpcnt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); } +// CHECK-LABEL: @vpcnt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); } +// CHECK-LABEL: @vpcnt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); } +// CHECK-LABEL: @vpcnt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); } +// CHECK-LABEL: @vclo_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); } +// CHECK-LABEL: @vclo_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); } +// CHECK-LABEL: @vclo_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); } +// CHECK-LABEL: @vclo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); } +// CHECK-LABEL: @vclz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); } +// CHECK-LABEL: @vclz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); } +// CHECK-LABEL: @vclz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); } +// CHECK-LABEL: @vclz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); } +// CHECK-LABEL: @vpickve2gr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); } +// CHECK-LABEL: @vpickve2gr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); } +// CHECK-LABEL: @vpickve2gr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); } +// CHECK-LABEL: @vpickve2gr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i64 [[TMP0]] +// +long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); } +// CHECK-LABEL: @vpickve2gr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); } +// CHECK-LABEL: @vpickve2gr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); } +// CHECK-LABEL: @vpickve2gr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); } +// CHECK-LABEL: @vpickve2gr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i64 [[TMP0]] +// +unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); } +// CHECK-LABEL: @vinsgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); } +// CHECK-LABEL: @vinsgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); } +// CHECK-LABEL: @vinsgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); } +// CHECK-LABEL: @vinsgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); } +// CHECK-LABEL: @vfadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); } +// CHECK-LABEL: @vfadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); } +// CHECK-LABEL: @vfsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); } +// CHECK-LABEL: @vfsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); } +// CHECK-LABEL: @vfmul_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); } +// CHECK-LABEL: @vfmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); } +// CHECK-LABEL: @vfdiv_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); } +// CHECK-LABEL: @vfdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); } +// CHECK-LABEL: @vfcvt_h_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); } +// CHECK-LABEL: @vfcvt_s_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); } +// CHECK-LABEL: @vfmin_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); } +// CHECK-LABEL: @vfmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); } +// CHECK-LABEL: @vfmina_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); } +// CHECK-LABEL: @vfmina_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); } +// CHECK-LABEL: @vfmax_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); } +// CHECK-LABEL: @vfmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); } +// CHECK-LABEL: @vfmaxa_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); } +// CHECK-LABEL: @vfmaxa_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); } +// CHECK-LABEL: @vfclass_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); } +// CHECK-LABEL: @vfclass_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); } +// CHECK-LABEL: @vfsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); } +// CHECK-LABEL: @vfsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); } +// CHECK-LABEL: @vfrecip_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); } +// CHECK-LABEL: @vfrecip_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); } +// CHECK-LABEL: @vfrint_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); } +// CHECK-LABEL: @vfrint_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); } +// CHECK-LABEL: @vfrsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); } +// CHECK-LABEL: @vfrsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); } +// CHECK-LABEL: @vflogb_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); } +// CHECK-LABEL: @vflogb_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); } +// CHECK-LABEL: @vfcvth_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); } +// CHECK-LABEL: @vfcvth_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); } +// CHECK-LABEL: @vfcvtl_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); } +// CHECK-LABEL: @vfcvtl_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); } +// CHECK-LABEL: @vftint_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); } +// CHECK-LABEL: @vftint_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); } +// CHECK-LABEL: @vftint_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); } +// CHECK-LABEL: @vftint_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); } +// CHECK-LABEL: @vftintrz_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); } +// CHECK-LABEL: @vftintrz_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); } +// CHECK-LABEL: @vftintrz_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); } +// CHECK-LABEL: @vftintrz_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); } +// CHECK-LABEL: @vffint_s_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); } +// CHECK-LABEL: @vffint_d_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); } +// CHECK-LABEL: @vffint_s_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); } +// CHECK-LABEL: @vffint_d_lu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); } +// CHECK-LABEL: @vandn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); } +// CHECK-LABEL: @vneg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); } +// CHECK-LABEL: @vneg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); } +// CHECK-LABEL: @vneg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); } +// CHECK-LABEL: @vneg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); } +// CHECK-LABEL: @vmuh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); } +// CHECK-LABEL: @vmuh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); } +// CHECK-LABEL: @vmuh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); } +// CHECK-LABEL: @vmuh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); } +// CHECK-LABEL: @vmuh_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); } +// CHECK-LABEL: @vmuh_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); } +// CHECK-LABEL: @vmuh_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); } +// CHECK-LABEL: @vmuh_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); } +// CHECK-LABEL: @vsllwil_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); } +// CHECK-LABEL: @vsllwil_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); } +// CHECK-LABEL: @vsllwil_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); } +// CHECK-LABEL: @vsllwil_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); } +// CHECK-LABEL: @vsllwil_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); } +// CHECK-LABEL: @vsllwil_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); } +// CHECK-LABEL: @vsran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); } +// CHECK-LABEL: @vsran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); } +// CHECK-LABEL: @vsran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); } +// CHECK-LABEL: @vssran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); } +// CHECK-LABEL: @vssran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); } +// CHECK-LABEL: @vssran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); } +// CHECK-LABEL: @vssran_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); } +// CHECK-LABEL: @vssran_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); } +// CHECK-LABEL: @vssran_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); } +// CHECK-LABEL: @vsrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); } +// CHECK-LABEL: @vsrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); } +// CHECK-LABEL: @vsrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); } +// CHECK-LABEL: @vssrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); } +// CHECK-LABEL: @vssrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); } +// CHECK-LABEL: @vssrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); } +// CHECK-LABEL: @vssrarn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); } +// CHECK-LABEL: @vssrarn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); } +// CHECK-LABEL: @vssrarn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); } +// CHECK-LABEL: @vsrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); } +// CHECK-LABEL: @vsrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); } +// CHECK-LABEL: @vsrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); } +// CHECK-LABEL: @vssrln_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); } +// CHECK-LABEL: @vssrln_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); } +// CHECK-LABEL: @vssrln_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); } +// CHECK-LABEL: @vsrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); } +// CHECK-LABEL: @vsrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); } +// CHECK-LABEL: @vsrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); } +// CHECK-LABEL: @vssrlrn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); } +// CHECK-LABEL: @vssrlrn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); } +// CHECK-LABEL: @vssrlrn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); } +// CHECK-LABEL: @vfrstpi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); } +// CHECK-LABEL: @vfrstpi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); } +// CHECK-LABEL: @vfrstp_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vfrstp_b(_1, _2, _3); +} +// CHECK-LABEL: @vfrstp_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vfrstp_h(_1, _2, _3); +} +// CHECK-LABEL: @vshuf4i_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); } +// CHECK-LABEL: @vbsrl_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); } +// CHECK-LABEL: @vbsll_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); } +// CHECK-LABEL: @vextrins_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); } +// CHECK-LABEL: @vextrins_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); } +// CHECK-LABEL: @vextrins_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); } +// CHECK-LABEL: @vextrins_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); } +// CHECK-LABEL: @vmskltz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); } +// CHECK-LABEL: @vmskltz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); } +// CHECK-LABEL: @vmskltz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); } +// CHECK-LABEL: @vmskltz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); } +// CHECK-LABEL: @vsigncov_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); } +// CHECK-LABEL: @vsigncov_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); } +// CHECK-LABEL: @vsigncov_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); } +// CHECK-LABEL: @vsigncov_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); } +// CHECK-LABEL: @vfmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfmadd_s(_1, _2, _3); +} +// CHECK-LABEL: @vfmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vfmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfmsub_s(_1, _2, _3); +} +// CHECK-LABEL: @vfmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vfnmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfnmadd_s(_1, _2, _3); +} +// CHECK-LABEL: @vfnmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfnmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vfnmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfnmsub_s(_1, _2, _3); +} +// CHECK-LABEL: @vfnmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfnmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vftintrne_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); } +// CHECK-LABEL: @vftintrne_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); } +// CHECK-LABEL: @vftintrp_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); } +// CHECK-LABEL: @vftintrp_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); } +// CHECK-LABEL: @vftintrm_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); } +// CHECK-LABEL: @vftintrm_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); } +// CHECK-LABEL: @vftint_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); } +// CHECK-LABEL: @vffint_s_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); } +// CHECK-LABEL: @vftintrz_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); } +// CHECK-LABEL: @vftintrp_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); } +// CHECK-LABEL: @vftintrm_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); } +// CHECK-LABEL: @vftintrne_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); } +// CHECK-LABEL: @vftintl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); } +// CHECK-LABEL: @vftinth_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); } +// CHECK-LABEL: @vffinth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); } +// CHECK-LABEL: @vffintl_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); } +// CHECK-LABEL: @vftintrzl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); } +// CHECK-LABEL: @vftintrzh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); } +// CHECK-LABEL: @vftintrpl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); } +// CHECK-LABEL: @vftintrph_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); } +// CHECK-LABEL: @vftintrml_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); } +// CHECK-LABEL: @vftintrmh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); } +// CHECK-LABEL: @vftintrnel_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); } +// CHECK-LABEL: @vftintrneh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); } +// CHECK-LABEL: @vfrintrne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); } +// CHECK-LABEL: @vfrintrne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// +v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); } +// CHECK-LABEL: @vfrintrz_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); } +// CHECK-LABEL: @vfrintrz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// +v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); } +// CHECK-LABEL: @vfrintrp_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); } +// CHECK-LABEL: @vfrintrp_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// +v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); } +// CHECK-LABEL: @vfrintrm_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); } +// CHECK-LABEL: @vfrintrm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// +v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); } +// CHECK-LABEL: @vstelm_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); } +// CHECK-LABEL: @vstelm_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); } +// CHECK-LABEL: @vstelm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); } +// CHECK-LABEL: @vstelm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); } +// CHECK-LABEL: @vaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); } +// CHECK-LABEL: @vaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); } +// CHECK-LABEL: @vaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); } +// CHECK-LABEL: @vaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); } +// CHECK-LABEL: @vaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); } +// CHECK-LABEL: @vaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); } +// CHECK-LABEL: @vaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); } +// CHECK-LABEL: @vaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); } +// CHECK-LABEL: @vaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); } +// CHECK-LABEL: @vaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); } +// CHECK-LABEL: @vaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); } +// CHECK-LABEL: @vaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); } +// CHECK-LABEL: @vaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vaddwev_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vaddwev_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vaddwev_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vaddwod_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vaddwod_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vaddwod_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vsubwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); } +// CHECK-LABEL: @vsubwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); } +// CHECK-LABEL: @vsubwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); } +// CHECK-LABEL: @vsubwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); } +// CHECK-LABEL: @vsubwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); } +// CHECK-LABEL: @vsubwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); } +// CHECK-LABEL: @vsubwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); } +// CHECK-LABEL: @vsubwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); } +// CHECK-LABEL: @vsubwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); } +// CHECK-LABEL: @vsubwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); } +// CHECK-LABEL: @vsubwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); } +// CHECK-LABEL: @vsubwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); } +// CHECK-LABEL: @vaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); } +// CHECK-LABEL: @vaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); } +// CHECK-LABEL: @vaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); } +// CHECK-LABEL: @vaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); } +// CHECK-LABEL: @vsubwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); } +// CHECK-LABEL: @vsubwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); } +// CHECK-LABEL: @vsubwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); } +// CHECK-LABEL: @vsubwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); } +// CHECK-LABEL: @vaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vaddwev_q_du_d(_1, _2); +} +// CHECK-LABEL: @vaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vaddwod_q_du_d(_1, _2); +} +// CHECK-LABEL: @vmulwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); } +// CHECK-LABEL: @vmulwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); } +// CHECK-LABEL: @vmulwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); } +// CHECK-LABEL: @vmulwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); } +// CHECK-LABEL: @vmulwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); } +// CHECK-LABEL: @vmulwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); } +// CHECK-LABEL: @vmulwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); } +// CHECK-LABEL: @vmulwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); } +// CHECK-LABEL: @vmulwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); } +// CHECK-LABEL: @vmulwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); } +// CHECK-LABEL: @vmulwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); } +// CHECK-LABEL: @vmulwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); } +// CHECK-LABEL: @vmulwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vmulwev_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vmulwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vmulwev_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vmulwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vmulwev_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vmulwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vmulwod_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vmulwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vmulwod_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vmulwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vmulwod_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vmulwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); } +// CHECK-LABEL: @vmulwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); } +// CHECK-LABEL: @vmulwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); } +// CHECK-LABEL: @vmulwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); } +// CHECK-LABEL: @vmulwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vmulwev_q_du_d(_1, _2); +} +// CHECK-LABEL: @vmulwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vmulwod_q_du_d(_1, _2); +} +// CHECK-LABEL: @vhaddw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); } +// CHECK-LABEL: @vhaddw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); } +// CHECK-LABEL: @vhsubw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); } +// CHECK-LABEL: @vhsubw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); } +// CHECK-LABEL: @vmaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmaddwev_d_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmaddwev_w_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmaddwev_h_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __lsx_vmaddwev_d_wu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __lsx_vmaddwev_w_hu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __lsx_vmaddwev_h_bu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmaddwod_d_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmaddwod_w_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmaddwod_h_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __lsx_vmaddwod_d_wu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __lsx_vmaddwod_w_hu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __lsx_vmaddwod_h_bu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __lsx_vmaddwev_d_wu_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __lsx_vmaddwev_w_hu_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __lsx_vmaddwev_h_bu_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __lsx_vmaddwod_d_wu_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __lsx_vmaddwod_w_hu_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __lsx_vmaddwod_h_bu_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmaddwev_q_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmaddwod_q_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __lsx_vmaddwev_q_du(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __lsx_vmaddwod_q_du(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __lsx_vmaddwev_q_du_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __lsx_vmaddwod_q_du_d(_1, _2, _3); +} +// CHECK-LABEL: @vrotr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); } +// CHECK-LABEL: @vrotr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); } +// CHECK-LABEL: @vrotr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); } +// CHECK-LABEL: @vrotr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); } +// CHECK-LABEL: @vadd_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); } +// CHECK-LABEL: @vsub_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); } +// CHECK-LABEL: @vldrepl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); } +// CHECK-LABEL: @vldrepl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); } +// CHECK-LABEL: @vldrepl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); } +// CHECK-LABEL: @vldrepl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); } +// CHECK-LABEL: @vmskgez_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); } +// CHECK-LABEL: @vmsknz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); } +// CHECK-LABEL: @vexth_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); } +// CHECK-LABEL: @vexth_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); } +// CHECK-LABEL: @vexth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); } +// CHECK-LABEL: @vexth_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); } +// CHECK-LABEL: @vexth_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); } +// CHECK-LABEL: @vexth_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); } +// CHECK-LABEL: @vexth_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); } +// CHECK-LABEL: @vexth_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); } +// CHECK-LABEL: @vrotri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); } +// CHECK-LABEL: @vrotri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); } +// CHECK-LABEL: @vrotri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); } +// CHECK-LABEL: @vrotri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); } +// CHECK-LABEL: @vextl_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); } +// CHECK-LABEL: @vsrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vsrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vsrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vsrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vsrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vsrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vsrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vsrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { + return __lsx_vssrlrni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { + return __lsx_vssrlrni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { + return __lsx_vssrlrni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { + return __lsx_vssrlrni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @vsrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @vsrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @vsrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @vsrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vsrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vsrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vsrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @vssrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @vssrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @vssrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrani_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); } +// CHECK-LABEL: @vssrani_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); } +// CHECK-LABEL: @vssrani_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); } +// CHECK-LABEL: @vssrani_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { + return __lsx_vssrarni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { + return __lsx_vssrarni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { + return __lsx_vssrarni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { + return __lsx_vssrarni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vpermi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); } +// CHECK-LABEL: @vld( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vld(void *_1) { return __lsx_vld(_1, 1); } +// CHECK-LABEL: @vst( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) +// CHECK-NEXT: ret void +// +void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); } +// CHECK-LABEL: @vssrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); } +// CHECK-LABEL: @vssrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); } +// CHECK-LABEL: @vssrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); } +// CHECK-LABEL: @vssrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); } +// CHECK-LABEL: @vssrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); } +// CHECK-LABEL: @vssrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); } +// CHECK-LABEL: @vorn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); } +// CHECK-LABEL: @vldi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vldi() { return __lsx_vldi(1); } +// CHECK-LABEL: @vshuf_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vshuf_b(_1, _2, _3); +} +// CHECK-LABEL: @vldx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); } +// CHECK-LABEL: @vstx( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) +// CHECK-NEXT: ret void +// +void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); } +// CHECK-LABEL: @vextl_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); } +// CHECK-LABEL: @bnz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); } +// CHECK-LABEL: @bnz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); } +// CHECK-LABEL: @bnz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); } +// CHECK-LABEL: @bnz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); } +// CHECK-LABEL: @bnz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); } +// CHECK-LABEL: @bz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bz_b(v16u8 _1) { return __lsx_bz_b(_1); } +// CHECK-LABEL: @bz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bz_d(v2u64 _1) { return __lsx_bz_d(_1); } +// CHECK-LABEL: @bz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bz_h(v8u16 _1) { return __lsx_bz_h(_1); } +// CHECK-LABEL: @bz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bz_v(v16u8 _1) { return __lsx_bz_v(_1); } +// CHECK-LABEL: @bz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bz_w(v4u32 _1) { return __lsx_bz_w(_1); } +// CHECK-LABEL: @vfcmp_caf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); } +// CHECK-LABEL: @vfcmp_caf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); } +// CHECK-LABEL: @vfcmp_ceq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); } +// CHECK-LABEL: @vfcmp_ceq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); } +// CHECK-LABEL: @vfcmp_clt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); } +// CHECK-LABEL: @vfcmp_clt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); } +// CHECK-LABEL: @vfcmp_saf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); } +// CHECK-LABEL: @vfcmp_saf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); } +// CHECK-LABEL: @vfcmp_seq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); } +// CHECK-LABEL: @vfcmp_seq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); } +// CHECK-LABEL: @vfcmp_slt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); } +// CHECK-LABEL: @vfcmp_slt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); } +// CHECK-LABEL: @vrepli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vrepli_b() { return __lsx_vrepli_b(1); } +// CHECK-LABEL: @vrepli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vrepli_d() { return __lsx_vrepli_d(1); } +// CHECK-LABEL: @vrepli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vrepli_h() { return __lsx_vrepli_h(1); } +// CHECK-LABEL: @vrepli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vrepli_w() { return __lsx_vrepli_w(1); } diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c new file mode 100644 index 00000000000000..3fc5f73f11934e --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c @@ -0,0 +1,1382 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s + +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); + +typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); +typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); +typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); + +v16i8 vslli_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} + return res; +} + +v8i16 vslli_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} + return res; +} + +v4i32 vslli_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} + return res; +} + +v2i64 vslli_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} + return res; +} + +v16i8 vsrai_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} + return res; +} + +v8i16 vsrai_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} + return res; +} + +v4i32 vsrai_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} + return res; +} + +v2i64 vsrai_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} + return res; +} + +v16i8 vsrari_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} + return res; +} + +v8i16 vsrari_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} + return res; +} + +v4i32 vsrari_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} + return res; +} + +v2i64 vsrari_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} + return res; +} + +v16i8 vsrli_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} + return res; +} + +v8i16 vsrli_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} + return res; +} + +v4i32 vsrli_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} + return res; +} + +v2i64 vsrli_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} + return res; +} + +v16i8 vsrlri_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} + return res; +} + +v8i16 vsrlri_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} + return res; +} + +v4i32 vsrlri_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} + return res; +} + +v2i64 vsrlri_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} + return res; +} + +v16u8 vbitclri_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} + return res; +} + +v8u16 vbitclri_h(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} + return res; +} + +v4u32 vbitclri_w(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} + return res; +} + +v2u64 vbitclri_d(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} + return res; +} + +v16u8 vbitseti_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} + return res; +} + +v8u16 vbitseti_h(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} + return res; +} + +v4u32 vbitseti_w(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} + return res; +} + +v2u64 vbitseti_d(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} + return res; +} + +v16u8 vbitrevi_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} + return res; +} + +v8u16 vbitrevi_h(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} + return res; +} + +v4u32 vbitrevi_w(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} + return res; +} + +v2u64 vbitrevi_d(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} + return res; +} + +v16i8 vaddi_bu(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} + return res; +} + +v8i16 vaddi_hu(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} + return res; +} + +v4i32 vaddi_wu(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} + return res; +} + +v2i64 vaddi_du(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} + return res; +} + +v16i8 vsubi_bu(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} + return res; +} + +v8i16 vsubi_hu(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} + return res; +} + +v4i32 vsubi_wu(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} + return res; +} + +v2i64 vsubi_du(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} + return res; +} + +v16i8 vmaxi_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} + return res; +} + +v8i16 vmaxi_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} + return res; +} + +v4i32 vmaxi_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} + return res; +} + +v2i64 vmaxi_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} + return res; +} + +v16u8 vmaxi_bu(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} + return res; +} + +v8u16 vmaxi_hu(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} + return res; +} + +v4u32 vmaxi_wu(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} + return res; +} + +v2u64 vmaxi_du(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} + return res; +} + +v16i8 vmini_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} + return res; +} + +v8i16 vmini_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} + return res; +} + +v4i32 vmini_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} + return res; +} + +v2i64 vmini_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} + return res; +} + +v16u8 vmini_bu(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} + return res; +} + +v8u16 vmini_hu(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} + return res; +} + +v4u32 vmini_wu(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} + return res; +} + +v2u64 vmini_du(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} + return res; +} + +v16i8 vseqi_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} + return res; +} + +v8i16 vseqi_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} + return res; +} + +v4i32 vseqi_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} + return res; +} + +v2i64 vseqi_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} + return res; +} + +v16i8 vslti_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} + return res; +} + +v8i16 vslti_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} + return res; +} + +v4i32 vslti_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} + return res; +} + +v2i64 vslti_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} + return res; +} + +v16i8 vslti_bu(v16u8 _1, int var) { + v16i8 res = __builtin_lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} + return res; +} + +v8i16 vslti_hu(v8u16 _1, int var) { + v8i16 res = __builtin_lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} + return res; +} + +v4i32 vslti_wu(v4u32 _1, int var) { + v4i32 res = __builtin_lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} + return res; +} + +v2i64 vslti_du(v2u64 _1, int var) { + v2i64 res = __builtin_lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} + return res; +} + +v16i8 vslei_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} + return res; +} + +v8i16 vslei_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} + return res; +} + +v4i32 vslei_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} + return res; +} + +v2i64 vslei_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} + return res; +} + +v16i8 vslei_bu(v16u8 _1, int var) { + v16i8 res = __builtin_lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} + return res; +} + +v8i16 vslei_hu(v8u16 _1, int var) { + v8i16 res = __builtin_lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} + return res; +} + +v4i32 vslei_wu(v4u32 _1, int var) { + v4i32 res = __builtin_lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} + return res; +} + +v2i64 vslei_du(v2u64 _1, int var) { + v2i64 res = __builtin_lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} + return res; +} + +v16i8 vsat_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} + return res; +} + +v8i16 vsat_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} + return res; +} + +v4i32 vsat_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} + return res; +} + +v2i64 vsat_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} + return res; +} + +v16u8 vsat_bu(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} + return res; +} + +v8u16 vsat_hu(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} + return res; +} + +v4u32 vsat_wu(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} + return res; +} + +v2u64 vsat_du(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} + return res; +} + +v16i8 vreplvei_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} + return res; +} + +v8i16 vreplvei_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} + return res; +} + +v4i32 vreplvei_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} + return res; +} + +v2i64 vreplvei_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} + return res; +} + +v16u8 vandi_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} + return res; +} + +v16u8 vori_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} + return res; +} + +v16u8 vnori_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} + return res; +} + +v16u8 vxori_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} + return res; +} + +v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { + v16u8 res = __builtin_lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} + return res; +} + +v16i8 vshuf4i_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} + return res; +} + +v8i16 vshuf4i_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} + return res; +} + +v4i32 vshuf4i_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} + return res; +} + +int vpickve2gr_b(v16i8 _1, int var) { + int res = __builtin_lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} + return res; +} + +int vpickve2gr_h(v8i16 _1, int var) { + int res = __builtin_lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} + return res; +} + +int vpickve2gr_w(v4i32 _1, int var) { + int res = __builtin_lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} + return res; +} + +long vpickve2gr_d(v2i64 _1, int var) { + long res = __builtin_lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_bu(v16i8 _1, int var) { + unsigned int res = __builtin_lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_hu(v8i16 _1, int var) { + unsigned int res = __builtin_lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_wu(v4i32 _1, int var) { + unsigned int res = __builtin_lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} + return res; +} + +unsigned long int vpickve2gr_du(v2i64 _1, int var) { + unsigned long int res = __builtin_lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} + return res; +} + +v16i8 vinsgr2vr_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} + return res; +} + +v8i16 vinsgr2vr_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} + return res; +} + +v4i32 vinsgr2vr_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} + return res; +} + +v2i64 vinsgr2vr_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} + return res; +} + +v8i16 vsllwil_h_b(v16i8 _1, int var) { + v8i16 res = __builtin_lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} + return res; +} + +v4i32 vsllwil_w_h(v8i16 _1, int var) { + v4i32 res = __builtin_lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} + return res; +} + +v2i64 vsllwil_d_w(v4i32 _1, int var) { + v2i64 res = __builtin_lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} + return res; +} + +v8u16 vsllwil_hu_bu(v16u8 _1, int var) { + v8u16 res = __builtin_lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} + return res; +} + +v4u32 vsllwil_wu_hu(v8u16 _1, int var) { + v4u32 res = __builtin_lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} + return res; +} + +v2u64 vsllwil_du_wu(v4u32 _1, int var) { + v2u64 res = __builtin_lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} + return res; +} + +v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} + return res; +} + +v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} + return res; +} + +v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} + return res; +} + +v16i8 vbsrl_v(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} + return res; +} + +v16i8 vbsll_v(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} + return res; +} + +v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} + return res; +} + +v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} + return res; +} + +v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} + return res; +} + +v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} + return res; +} + +void vstelm_b_idx(v16i8 _1, void *_2, int var) { + __builtin_lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + __builtin_lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __builtin_lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +} + +void vstelm_h_idx(v8i16 _1, void *_2, int var) { + __builtin_lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + __builtin_lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __builtin_lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +} + +void vstelm_w_idx(v4i32 _1, void *_2, int var) { + __builtin_lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __builtin_lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __builtin_lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +} + +void vstelm_d_idx(v2i64 _1, void *_2, int var) { + __builtin_lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + __builtin_lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + __builtin_lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +} + +void vstelm_b(v16i8 _1, void *_2, int var) { + __builtin_lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} + __builtin_lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} + __builtin_lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +} + +void vstelm_h(v8i16 _1, void *_2, int var) { + __builtin_lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} + __builtin_lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} + __builtin_lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +} + +void vstelm_w(v4i32 _1, void *_2, int var) { + __builtin_lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} + __builtin_lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} + __builtin_lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +} + +void vstelm_d(v2i64 _1, void *_2, int var) { + __builtin_lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} + __builtin_lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} + __builtin_lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +} + +v16i8 vldrepl_b(void *_1, int var) { + v16i8 res = __builtin_lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __builtin_lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __builtin_lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} + return res; +} + +v8i16 vldrepl_h(void *_1, int var) { + v8i16 res = __builtin_lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} + res |= __builtin_lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} + res |= __builtin_lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} + return res; +} + +v4i32 vldrepl_w(void *_1, int var) { + v4i32 res = __builtin_lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} + res |= __builtin_lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} + res |= __builtin_lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} + return res; +} + +v2i64 vldrepl_d(void *_1, int var) { + v2i64 res = __builtin_lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} + res |= __builtin_lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} + res |= __builtin_lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} + return res; +} + +v16i8 vrotri_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} + return res; +} + +v8i16 vrotri_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} + return res; +} + +v4i32 vrotri_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} + return res; +} + +v2i64 vrotri_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} + return res; +} + +v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} + return res; +} + +v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} + return res; +} + +v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __builtin_lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __builtin_lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __builtin_lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __builtin_lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} + return res; +} + +v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __builtin_lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __builtin_lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __builtin_lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __builtin_lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} + return res; +} + +v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} + return res; +} + +v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} + return res; +} + +v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __builtin_lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __builtin_lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __builtin_lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __builtin_lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} + return res; +} + +v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __builtin_lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __builtin_lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __builtin_lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __builtin_lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} + return res; +} + +v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} + return res; +} + +v16i8 vld(void *_1, int var) { + v16i8 res = __builtin_lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __builtin_lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __builtin_lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} + return res; +} + +void vst(v16i8 _1, void *_2, int var) { + __builtin_lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __builtin_lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + __builtin_lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} +} + +v2i64 vldi(int var) { + v2i64 res = __builtin_lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + res |= __builtin_lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} + res |= __builtin_lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} + return res; +} + +v16i8 vrepli_b(int var) { + v16i8 res = __builtin_lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} + return res; +} + +v2i64 vrepli_d(int var) { + v2i64 res = __builtin_lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} + return res; +} + +v8i16 vrepli_h(int var) { + v8i16 res = __builtin_lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} + return res; +} + +v4i32 vrepli_w(int var) { + v4i32 res = __builtin_lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} + return res; +} diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin.c b/clang/test/CodeGen/LoongArch/lsx/builtin.c new file mode 100644 index 00000000000000..ef5a390e1838c8 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin.c @@ -0,0 +1,5193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s + +typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__ ((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8))); + +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + + +// CHECK-LABEL: @vsll_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); } +// CHECK-LABEL: @vsll_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); } +// CHECK-LABEL: @vsll_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); } +// CHECK-LABEL: @vsll_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); } +// CHECK-LABEL: @vslli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); } +// CHECK-LABEL: @vslli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); } +// CHECK-LABEL: @vslli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); } +// CHECK-LABEL: @vslli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); } +// CHECK-LABEL: @vsra_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); } +// CHECK-LABEL: @vsra_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); } +// CHECK-LABEL: @vsra_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); } +// CHECK-LABEL: @vsra_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); } +// CHECK-LABEL: @vsrai_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); } +// CHECK-LABEL: @vsrai_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); } +// CHECK-LABEL: @vsrai_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); } +// CHECK-LABEL: @vsrai_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); } +// CHECK-LABEL: @vsrar_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrar_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrar_b(_1, _2); +} +// CHECK-LABEL: @vsrar_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrar_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrar_h(_1, _2); +} +// CHECK-LABEL: @vsrar_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrar_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrar_w(_1, _2); +} +// CHECK-LABEL: @vsrar_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrar_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrar_d(_1, _2); +} +// CHECK-LABEL: @vsrari_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); } +// CHECK-LABEL: @vsrari_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); } +// CHECK-LABEL: @vsrari_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); } +// CHECK-LABEL: @vsrari_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); } +// CHECK-LABEL: @vsrl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); } +// CHECK-LABEL: @vsrl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); } +// CHECK-LABEL: @vsrl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); } +// CHECK-LABEL: @vsrl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); } +// CHECK-LABEL: @vsrli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); } +// CHECK-LABEL: @vsrli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); } +// CHECK-LABEL: @vsrli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); } +// CHECK-LABEL: @vsrli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); } +// CHECK-LABEL: @vsrlr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlr_b(_1, _2); +} +// CHECK-LABEL: @vsrlr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlr_h(_1, _2); +} +// CHECK-LABEL: @vsrlr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlr_w(_1, _2); +} +// CHECK-LABEL: @vsrlr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlr_d(_1, _2); +} +// CHECK-LABEL: @vsrlri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); } +// CHECK-LABEL: @vsrlri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); } +// CHECK-LABEL: @vsrlri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); } +// CHECK-LABEL: @vsrlri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); } +// CHECK-LABEL: @vbitclr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitclr_b(_1, _2); +} +// CHECK-LABEL: @vbitclr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitclr_h(_1, _2); +} +// CHECK-LABEL: @vbitclr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitclr_w(_1, _2); +} +// CHECK-LABEL: @vbitclr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitclr_d(_1, _2); +} +// CHECK-LABEL: @vbitclri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); } +// CHECK-LABEL: @vbitclri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); } +// CHECK-LABEL: @vbitclri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); } +// CHECK-LABEL: @vbitclri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); } +// CHECK-LABEL: @vbitset_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitset_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitset_b(_1, _2); +} +// CHECK-LABEL: @vbitset_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vbitset_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitset_h(_1, _2); +} +// CHECK-LABEL: @vbitset_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vbitset_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitset_w(_1, _2); +} +// CHECK-LABEL: @vbitset_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vbitset_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitset_d(_1, _2); +} +// CHECK-LABEL: @vbitseti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); } +// CHECK-LABEL: @vbitseti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); } +// CHECK-LABEL: @vbitseti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); } +// CHECK-LABEL: @vbitseti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); } +// CHECK-LABEL: @vbitrev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitrev_b(_1, _2); +} +// CHECK-LABEL: @vbitrev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitrev_h(_1, _2); +} +// CHECK-LABEL: @vbitrev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitrev_w(_1, _2); +} +// CHECK-LABEL: @vbitrev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitrev_d(_1, _2); +} +// CHECK-LABEL: @vbitrevi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); } +// CHECK-LABEL: @vbitrevi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); } +// CHECK-LABEL: @vbitrevi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); } +// CHECK-LABEL: @vbitrevi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); } +// CHECK-LABEL: @vadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); } +// CHECK-LABEL: @vadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); } +// CHECK-LABEL: @vadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); } +// CHECK-LABEL: @vadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); } +// CHECK-LABEL: @vaddi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); } +// CHECK-LABEL: @vaddi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); } +// CHECK-LABEL: @vaddi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); } +// CHECK-LABEL: @vaddi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); } +// CHECK-LABEL: @vsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); } +// CHECK-LABEL: @vsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); } +// CHECK-LABEL: @vsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); } +// CHECK-LABEL: @vsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); } +// CHECK-LABEL: @vsubi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); } +// CHECK-LABEL: @vsubi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); } +// CHECK-LABEL: @vsubi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); } +// CHECK-LABEL: @vsubi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); } +// CHECK-LABEL: @vmax_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); } +// CHECK-LABEL: @vmax_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); } +// CHECK-LABEL: @vmax_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); } +// CHECK-LABEL: @vmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); } +// CHECK-LABEL: @vmaxi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); } +// CHECK-LABEL: @vmaxi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); } +// CHECK-LABEL: @vmaxi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); } +// CHECK-LABEL: @vmaxi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); } +// CHECK-LABEL: @vmax_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vmax_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmax_bu(_1, _2); +} +// CHECK-LABEL: @vmax_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmax_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmax_hu(_1, _2); +} +// CHECK-LABEL: @vmax_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmax_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmax_wu(_1, _2); +} +// CHECK-LABEL: @vmax_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmax_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmax_du(_1, _2); +} +// CHECK-LABEL: @vmaxi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); } +// CHECK-LABEL: @vmaxi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); } +// CHECK-LABEL: @vmaxi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); } +// CHECK-LABEL: @vmaxi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); } +// CHECK-LABEL: @vmin_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); } +// CHECK-LABEL: @vmin_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); } +// CHECK-LABEL: @vmin_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); } +// CHECK-LABEL: @vmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); } +// CHECK-LABEL: @vmini_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); } +// CHECK-LABEL: @vmini_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); } +// CHECK-LABEL: @vmini_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); } +// CHECK-LABEL: @vmini_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); } +// CHECK-LABEL: @vmin_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vmin_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmin_bu(_1, _2); +} +// CHECK-LABEL: @vmin_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmin_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmin_hu(_1, _2); +} +// CHECK-LABEL: @vmin_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmin_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmin_wu(_1, _2); +} +// CHECK-LABEL: @vmin_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmin_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmin_du(_1, _2); +} +// CHECK-LABEL: @vmini_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); } +// CHECK-LABEL: @vmini_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); } +// CHECK-LABEL: @vmini_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); } +// CHECK-LABEL: @vmini_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); } +// CHECK-LABEL: @vseq_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); } +// CHECK-LABEL: @vseq_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); } +// CHECK-LABEL: @vseq_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); } +// CHECK-LABEL: @vseq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); } +// CHECK-LABEL: @vseqi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); } +// CHECK-LABEL: @vseqi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); } +// CHECK-LABEL: @vseqi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); } +// CHECK-LABEL: @vseqi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); } +// CHECK-LABEL: @vslti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); } +// CHECK-LABEL: @vslt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); } +// CHECK-LABEL: @vslt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); } +// CHECK-LABEL: @vslt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); } +// CHECK-LABEL: @vslt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); } +// CHECK-LABEL: @vslti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); } +// CHECK-LABEL: @vslti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); } +// CHECK-LABEL: @vslti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); } +// CHECK-LABEL: @vslt_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslt_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vslt_bu(_1, _2); +} +// CHECK-LABEL: @vslt_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslt_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vslt_hu(_1, _2); +} +// CHECK-LABEL: @vslt_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslt_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vslt_wu(_1, _2); +} +// CHECK-LABEL: @vslt_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslt_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vslt_du(_1, _2); +} +// CHECK-LABEL: @vslti_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); } +// CHECK-LABEL: @vslti_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); } +// CHECK-LABEL: @vslti_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); } +// CHECK-LABEL: @vslti_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); } +// CHECK-LABEL: @vsle_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); } +// CHECK-LABEL: @vsle_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); } +// CHECK-LABEL: @vsle_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); } +// CHECK-LABEL: @vsle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); } +// CHECK-LABEL: @vslei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); } +// CHECK-LABEL: @vslei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); } +// CHECK-LABEL: @vslei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); } +// CHECK-LABEL: @vslei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); } +// CHECK-LABEL: @vsle_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsle_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsle_bu(_1, _2); +} +// CHECK-LABEL: @vsle_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsle_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsle_hu(_1, _2); +} +// CHECK-LABEL: @vsle_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsle_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsle_wu(_1, _2); +} +// CHECK-LABEL: @vsle_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsle_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsle_du(_1, _2); +} +// CHECK-LABEL: @vslei_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); } +// CHECK-LABEL: @vslei_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); } +// CHECK-LABEL: @vslei_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); } +// CHECK-LABEL: @vslei_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); } +// CHECK-LABEL: @vsat_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); } +// CHECK-LABEL: @vsat_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); } +// CHECK-LABEL: @vsat_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); } +// CHECK-LABEL: @vsat_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); } +// CHECK-LABEL: @vsat_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); } +// CHECK-LABEL: @vsat_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); } +// CHECK-LABEL: @vsat_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); } +// CHECK-LABEL: @vsat_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); } +// CHECK-LABEL: @vadda_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vadda_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vadda_b(_1, _2); +} +// CHECK-LABEL: @vadda_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vadda_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vadda_h(_1, _2); +} +// CHECK-LABEL: @vadda_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vadda_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vadda_w(_1, _2); +} +// CHECK-LABEL: @vadda_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vadda_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vadda_d(_1, _2); +} +// CHECK-LABEL: @vsadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsadd_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsadd_b(_1, _2); +} +// CHECK-LABEL: @vsadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsadd_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsadd_h(_1, _2); +} +// CHECK-LABEL: @vsadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsadd_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsadd_w(_1, _2); +} +// CHECK-LABEL: @vsadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsadd_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsadd_d(_1, _2); +} +// CHECK-LABEL: @vsadd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsadd_bu(_1, _2); +} +// CHECK-LABEL: @vsadd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsadd_hu(_1, _2); +} +// CHECK-LABEL: @vsadd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsadd_wu(_1, _2); +} +// CHECK-LABEL: @vsadd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vsadd_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsadd_du(_1, _2); +} +// CHECK-LABEL: @vavg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); } +// CHECK-LABEL: @vavg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); } +// CHECK-LABEL: @vavg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); } +// CHECK-LABEL: @vavg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); } +// CHECK-LABEL: @vavg_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vavg_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vavg_bu(_1, _2); +} +// CHECK-LABEL: @vavg_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vavg_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vavg_hu(_1, _2); +} +// CHECK-LABEL: @vavg_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vavg_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vavg_wu(_1, _2); +} +// CHECK-LABEL: @vavg_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vavg_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vavg_du(_1, _2); +} +// CHECK-LABEL: @vavgr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vavgr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vavgr_b(_1, _2); +} +// CHECK-LABEL: @vavgr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vavgr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vavgr_h(_1, _2); +} +// CHECK-LABEL: @vavgr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vavgr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vavgr_w(_1, _2); +} +// CHECK-LABEL: @vavgr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vavgr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vavgr_d(_1, _2); +} +// CHECK-LABEL: @vavgr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vavgr_bu(_1, _2); +} +// CHECK-LABEL: @vavgr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vavgr_hu(_1, _2); +} +// CHECK-LABEL: @vavgr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vavgr_wu(_1, _2); +} +// CHECK-LABEL: @vavgr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vavgr_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vavgr_du(_1, _2); +} +// CHECK-LABEL: @vssub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssub_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssub_b(_1, _2); +} +// CHECK-LABEL: @vssub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssub_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssub_h(_1, _2); +} +// CHECK-LABEL: @vssub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssub_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssub_w(_1, _2); +} +// CHECK-LABEL: @vssub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vssub_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssub_d(_1, _2); +} +// CHECK-LABEL: @vssub_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssub_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vssub_bu(_1, _2); +} +// CHECK-LABEL: @vssub_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssub_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssub_hu(_1, _2); +} +// CHECK-LABEL: @vssub_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssub_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssub_wu(_1, _2); +} +// CHECK-LABEL: @vssub_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vssub_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssub_du(_1, _2); +} +// CHECK-LABEL: @vabsd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vabsd_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vabsd_b(_1, _2); +} +// CHECK-LABEL: @vabsd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vabsd_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vabsd_h(_1, _2); +} +// CHECK-LABEL: @vabsd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vabsd_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vabsd_w(_1, _2); +} +// CHECK-LABEL: @vabsd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vabsd_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vabsd_d(_1, _2); +} +// CHECK-LABEL: @vabsd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vabsd_bu(_1, _2); +} +// CHECK-LABEL: @vabsd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vabsd_hu(_1, _2); +} +// CHECK-LABEL: @vabsd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vabsd_wu(_1, _2); +} +// CHECK-LABEL: @vabsd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vabsd_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vabsd_du(_1, _2); +} +// CHECK-LABEL: @vmul_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); } +// CHECK-LABEL: @vmul_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); } +// CHECK-LABEL: @vmul_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); } +// CHECK-LABEL: @vmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); } +// CHECK-LABEL: @vmadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmadd_b(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmadd_h(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmadd_w(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmsub_b(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmsub_h(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmsub_w(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vdiv_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); } +// CHECK-LABEL: @vdiv_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); } +// CHECK-LABEL: @vdiv_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); } +// CHECK-LABEL: @vdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); } +// CHECK-LABEL: @vdiv_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vdiv_bu(_1, _2); +} +// CHECK-LABEL: @vdiv_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vdiv_hu(_1, _2); +} +// CHECK-LABEL: @vdiv_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vdiv_wu(_1, _2); +} +// CHECK-LABEL: @vdiv_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vdiv_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vdiv_du(_1, _2); +} +// CHECK-LABEL: @vhaddw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vhaddw_h_b(_1, _2); +} +// CHECK-LABEL: @vhaddw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vhaddw_w_h(_1, _2); +} +// CHECK-LABEL: @vhaddw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vhaddw_d_w(_1, _2); +} +// CHECK-LABEL: @vhaddw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vhaddw_hu_bu(_1, _2); +} +// CHECK-LABEL: @vhaddw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vhaddw_wu_hu(_1, _2); +} +// CHECK-LABEL: @vhaddw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vhaddw_du_wu(_1, _2); +} +// CHECK-LABEL: @vhsubw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vhsubw_h_b(_1, _2); +} +// CHECK-LABEL: @vhsubw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vhsubw_w_h(_1, _2); +} +// CHECK-LABEL: @vhsubw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vhsubw_d_w(_1, _2); +} +// CHECK-LABEL: @vhsubw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vhsubw_hu_bu(_1, _2); +} +// CHECK-LABEL: @vhsubw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vhsubw_wu_hu(_1, _2); +} +// CHECK-LABEL: @vhsubw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vhsubw_du_wu(_1, _2); +} +// CHECK-LABEL: @vmod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); } +// CHECK-LABEL: @vmod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); } +// CHECK-LABEL: @vmod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); } +// CHECK-LABEL: @vmod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); } +// CHECK-LABEL: @vmod_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vmod_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmod_bu(_1, _2); +} +// CHECK-LABEL: @vmod_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmod_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmod_hu(_1, _2); +} +// CHECK-LABEL: @vmod_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmod_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmod_wu(_1, _2); +} +// CHECK-LABEL: @vmod_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmod_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmod_du(_1, _2); +} +// CHECK-LABEL: @vreplve_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vreplve_b(v16i8 _1, int _2) { + return __builtin_lsx_vreplve_b(_1, _2); +} +// CHECK-LABEL: @vreplve_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vreplve_h(v8i16 _1, int _2) { + return __builtin_lsx_vreplve_h(_1, _2); +} +// CHECK-LABEL: @vreplve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vreplve_w(v4i32 _1, int _2) { + return __builtin_lsx_vreplve_w(_1, _2); +} +// CHECK-LABEL: @vreplve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vreplve_d(v2i64 _1, int _2) { + return __builtin_lsx_vreplve_d(_1, _2); +} +// CHECK-LABEL: @vreplvei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); } +// CHECK-LABEL: @vreplvei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); } +// CHECK-LABEL: @vreplvei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); } +// CHECK-LABEL: @vreplvei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); } +// CHECK-LABEL: @vpickev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vpickev_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpickev_b(_1, _2); +} +// CHECK-LABEL: @vpickev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vpickev_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpickev_h(_1, _2); +} +// CHECK-LABEL: @vpickev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vpickev_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpickev_w(_1, _2); +} +// CHECK-LABEL: @vpickev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vpickev_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpickev_d(_1, _2); +} +// CHECK-LABEL: @vpickod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vpickod_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpickod_b(_1, _2); +} +// CHECK-LABEL: @vpickod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vpickod_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpickod_h(_1, _2); +} +// CHECK-LABEL: @vpickod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vpickod_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpickod_w(_1, _2); +} +// CHECK-LABEL: @vpickod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vpickod_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpickod_d(_1, _2); +} +// CHECK-LABEL: @vilvh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vilvh_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vilvh_b(_1, _2); +} +// CHECK-LABEL: @vilvh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vilvh_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vilvh_h(_1, _2); +} +// CHECK-LABEL: @vilvh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vilvh_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vilvh_w(_1, _2); +} +// CHECK-LABEL: @vilvh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vilvh_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vilvh_d(_1, _2); +} +// CHECK-LABEL: @vilvl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vilvl_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vilvl_b(_1, _2); +} +// CHECK-LABEL: @vilvl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vilvl_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vilvl_h(_1, _2); +} +// CHECK-LABEL: @vilvl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vilvl_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vilvl_w(_1, _2); +} +// CHECK-LABEL: @vilvl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vilvl_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vilvl_d(_1, _2); +} +// CHECK-LABEL: @vpackev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vpackev_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpackev_b(_1, _2); +} +// CHECK-LABEL: @vpackev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vpackev_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpackev_h(_1, _2); +} +// CHECK-LABEL: @vpackev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vpackev_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpackev_w(_1, _2); +} +// CHECK-LABEL: @vpackev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vpackev_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpackev_d(_1, _2); +} +// CHECK-LABEL: @vpackod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vpackod_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpackod_b(_1, _2); +} +// CHECK-LABEL: @vpackod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vpackod_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpackod_h(_1, _2); +} +// CHECK-LABEL: @vpackod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vpackod_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpackod_w(_1, _2); +} +// CHECK-LABEL: @vpackod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vpackod_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpackod_d(_1, _2); +} +// CHECK-LABEL: @vshuf_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vshuf_h(_1, _2, _3); +} +// CHECK-LABEL: @vshuf_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vshuf_w(_1, _2, _3); +} +// CHECK-LABEL: @vshuf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vshuf_d(_1, _2, _3); +} +// CHECK-LABEL: @vand_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); } +// CHECK-LABEL: @vandi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); } +// CHECK-LABEL: @vor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); } +// CHECK-LABEL: @vori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); } +// CHECK-LABEL: @vnor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); } +// CHECK-LABEL: @vnori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); } +// CHECK-LABEL: @vxor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); } +// CHECK-LABEL: @vxori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); } +// CHECK-LABEL: @vbitsel_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vbitsel_v(_1, _2, _3); +} +// CHECK-LABEL: @vbitseli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitseli_b(_1, _2, 1); +} +// CHECK-LABEL: @vshuf4i_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); } +// CHECK-LABEL: @vshuf4i_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); } +// CHECK-LABEL: @vshuf4i_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); } +// CHECK-LABEL: @vreplgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); } +// CHECK-LABEL: @vreplgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); } +// CHECK-LABEL: @vreplgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); } +// CHECK-LABEL: @vreplgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); } +// CHECK-LABEL: @vpcnt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); } +// CHECK-LABEL: @vpcnt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); } +// CHECK-LABEL: @vpcnt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); } +// CHECK-LABEL: @vpcnt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); } +// CHECK-LABEL: @vclo_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); } +// CHECK-LABEL: @vclo_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); } +// CHECK-LABEL: @vclo_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); } +// CHECK-LABEL: @vclo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); } +// CHECK-LABEL: @vclz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); } +// CHECK-LABEL: @vclz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); } +// CHECK-LABEL: @vclz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); } +// CHECK-LABEL: @vclz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); } +// CHECK-LABEL: @vpickve2gr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); } +// CHECK-LABEL: @vpickve2gr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); } +// CHECK-LABEL: @vpickve2gr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); } +// CHECK-LABEL: @vpickve2gr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i64 [[TMP0]] +// +long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); } +// CHECK-LABEL: @vpickve2gr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +unsigned int vpickve2gr_bu(v16i8 _1) { + return __builtin_lsx_vpickve2gr_bu(_1, 1); +} +// CHECK-LABEL: @vpickve2gr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +unsigned int vpickve2gr_hu(v8i16 _1) { + return __builtin_lsx_vpickve2gr_hu(_1, 1); +} +// CHECK-LABEL: @vpickve2gr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +unsigned int vpickve2gr_wu(v4i32 _1) { + return __builtin_lsx_vpickve2gr_wu(_1, 1); +} +// CHECK-LABEL: @vpickve2gr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i64 [[TMP0]] +// +unsigned long int vpickve2gr_du(v2i64 _1) { + return __builtin_lsx_vpickve2gr_du(_1, 1); +} +// CHECK-LABEL: @vinsgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[_1:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vinsgr2vr_b(v16i8 _1) { + return __builtin_lsx_vinsgr2vr_b(_1, 1, 1); +} +// CHECK-LABEL: @vinsgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[_1:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vinsgr2vr_h(v8i16 _1) { + return __builtin_lsx_vinsgr2vr_h(_1, 1, 1); +} +// CHECK-LABEL: @vinsgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[_1:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vinsgr2vr_w(v4i32 _1) { + return __builtin_lsx_vinsgr2vr_w(_1, 1, 1); +} +// CHECK-LABEL: @vinsgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[_1:%.*]], i64 1, i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vinsgr2vr_d(v2i64 _1) { + return __builtin_lsx_vinsgr2vr_d(_1, 1, 1); +} +// CHECK-LABEL: @vfadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfadd_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfadd_s(_1, _2); +} +// CHECK-LABEL: @vfadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfadd_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfadd_d(_1, _2); +} +// CHECK-LABEL: @vfsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfsub_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfsub_s(_1, _2); +} +// CHECK-LABEL: @vfsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfsub_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfsub_d(_1, _2); +} +// CHECK-LABEL: @vfmul_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmul_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmul_s(_1, _2); +} +// CHECK-LABEL: @vfmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmul_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmul_d(_1, _2); +} +// CHECK-LABEL: @vfdiv_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfdiv_s(_1, _2); +} +// CHECK-LABEL: @vfdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfdiv_d(_1, _2); +} +// CHECK-LABEL: @vfcvt_h_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcvt_h_s(_1, _2); +} +// CHECK-LABEL: @vfcvt_s_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcvt_s_d(_1, _2); +} +// CHECK-LABEL: @vfmin_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmin_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmin_s(_1, _2); +} +// CHECK-LABEL: @vfmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmin_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmin_d(_1, _2); +} +// CHECK-LABEL: @vfmina_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmina_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmina_s(_1, _2); +} +// CHECK-LABEL: @vfmina_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmina_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmina_d(_1, _2); +} +// CHECK-LABEL: @vfmax_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmax_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmax_s(_1, _2); +} +// CHECK-LABEL: @vfmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmax_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmax_d(_1, _2); +} +// CHECK-LABEL: @vfmaxa_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmaxa_s(_1, _2); +} +// CHECK-LABEL: @vfmaxa_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmaxa_d(_1, _2); +} +// CHECK-LABEL: @vfclass_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); } +// CHECK-LABEL: @vfclass_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); } +// CHECK-LABEL: @vfsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); } +// CHECK-LABEL: @vfsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); } +// CHECK-LABEL: @vfrecip_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); } +// CHECK-LABEL: @vfrecip_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); } +// CHECK-LABEL: @vfrint_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); } +// CHECK-LABEL: @vfrint_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); } +// CHECK-LABEL: @vfrsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); } +// CHECK-LABEL: @vfrsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); } +// CHECK-LABEL: @vflogb_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); } +// CHECK-LABEL: @vflogb_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); } +// CHECK-LABEL: @vfcvth_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); } +// CHECK-LABEL: @vfcvth_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); } +// CHECK-LABEL: @vfcvtl_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); } +// CHECK-LABEL: @vfcvtl_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); } +// CHECK-LABEL: @vftint_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); } +// CHECK-LABEL: @vftint_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); } +// CHECK-LABEL: @vftint_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); } +// CHECK-LABEL: @vftint_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); } +// CHECK-LABEL: @vftintrz_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); } +// CHECK-LABEL: @vftintrz_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); } +// CHECK-LABEL: @vftintrz_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); } +// CHECK-LABEL: @vftintrz_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); } +// CHECK-LABEL: @vffint_s_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); } +// CHECK-LABEL: @vffint_d_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); } +// CHECK-LABEL: @vffint_s_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); } +// CHECK-LABEL: @vffint_d_lu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); } +// CHECK-LABEL: @vandn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vandn_v(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vandn_v(_1, _2); +} +// CHECK-LABEL: @vneg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); } +// CHECK-LABEL: @vneg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); } +// CHECK-LABEL: @vneg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); } +// CHECK-LABEL: @vneg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); } +// CHECK-LABEL: @vmuh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); } +// CHECK-LABEL: @vmuh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); } +// CHECK-LABEL: @vmuh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); } +// CHECK-LABEL: @vmuh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); } +// CHECK-LABEL: @vmuh_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmuh_bu(_1, _2); +} +// CHECK-LABEL: @vmuh_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmuh_hu(_1, _2); +} +// CHECK-LABEL: @vmuh_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmuh_wu(_1, _2); +} +// CHECK-LABEL: @vmuh_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmuh_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmuh_du(_1, _2); +} +// CHECK-LABEL: @vsllwil_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); } +// CHECK-LABEL: @vsllwil_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); } +// CHECK-LABEL: @vsllwil_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); } +// CHECK-LABEL: @vsllwil_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vsllwil_hu_bu(v16u8 _1) { + return __builtin_lsx_vsllwil_hu_bu(_1, 1); +} +// CHECK-LABEL: @vsllwil_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vsllwil_wu_hu(v8u16 _1) { + return __builtin_lsx_vsllwil_wu_hu(_1, 1); +} +// CHECK-LABEL: @vsllwil_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vsllwil_du_wu(v4u32 _1) { + return __builtin_lsx_vsllwil_du_wu(_1, 1); +} +// CHECK-LABEL: @vsran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsran_b_h(_1, _2); +} +// CHECK-LABEL: @vsran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsran_h_w(_1, _2); +} +// CHECK-LABEL: @vsran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsran_w_d(_1, _2); +} +// CHECK-LABEL: @vssran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssran_b_h(_1, _2); +} +// CHECK-LABEL: @vssran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssran_h_w(_1, _2); +} +// CHECK-LABEL: @vssran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssran_w_d(_1, _2); +} +// CHECK-LABEL: @vssran_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssran_bu_h(_1, _2); +} +// CHECK-LABEL: @vssran_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssran_hu_w(_1, _2); +} +// CHECK-LABEL: @vssran_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssran_wu_d(_1, _2); +} +// CHECK-LABEL: @vsrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrarn_b_h(_1, _2); +} +// CHECK-LABEL: @vsrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrarn_h_w(_1, _2); +} +// CHECK-LABEL: @vsrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrarn_w_d(_1, _2); +} +// CHECK-LABEL: @vssrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrarn_b_h(_1, _2); +} +// CHECK-LABEL: @vssrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrarn_h_w(_1, _2); +} +// CHECK-LABEL: @vssrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrarn_w_d(_1, _2); +} +// CHECK-LABEL: @vssrarn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrarn_bu_h(_1, _2); +} +// CHECK-LABEL: @vssrarn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrarn_hu_w(_1, _2); +} +// CHECK-LABEL: @vssrarn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrarn_wu_d(_1, _2); +} +// CHECK-LABEL: @vsrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrln_b_h(_1, _2); +} +// CHECK-LABEL: @vsrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrln_h_w(_1, _2); +} +// CHECK-LABEL: @vsrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrln_w_d(_1, _2); +} +// CHECK-LABEL: @vssrln_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrln_bu_h(_1, _2); +} +// CHECK-LABEL: @vssrln_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrln_hu_w(_1, _2); +} +// CHECK-LABEL: @vssrln_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrln_wu_d(_1, _2); +} +// CHECK-LABEL: @vsrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlrn_b_h(_1, _2); +} +// CHECK-LABEL: @vsrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlrn_h_w(_1, _2); +} +// CHECK-LABEL: @vsrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlrn_w_d(_1, _2); +} +// CHECK-LABEL: @vssrlrn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrlrn_bu_h(_1, _2); +} +// CHECK-LABEL: @vssrlrn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrlrn_hu_w(_1, _2); +} +// CHECK-LABEL: @vssrlrn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrlrn_wu_d(_1, _2); +} +// CHECK-LABEL: @vfrstpi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vfrstpi_b(_1, _2, 1); +} +// CHECK-LABEL: @vfrstpi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vfrstpi_h(_1, _2, 1); +} +// CHECK-LABEL: @vfrstp_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vfrstp_b(_1, _2, _3); +} +// CHECK-LABEL: @vfrstp_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vfrstp_h(_1, _2, _3); +} +// CHECK-LABEL: @vshuf4i_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vshuf4i_d(_1, _2, 1); +} +// CHECK-LABEL: @vbsrl_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); } +// CHECK-LABEL: @vbsll_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); } +// CHECK-LABEL: @vextrins_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vextrins_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vextrins_b(_1, _2, 1); +} +// CHECK-LABEL: @vextrins_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vextrins_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vextrins_h(_1, _2, 1); +} +// CHECK-LABEL: @vextrins_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vextrins_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vextrins_w(_1, _2, 1); +} +// CHECK-LABEL: @vextrins_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vextrins_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vextrins_d(_1, _2, 1); +} +// CHECK-LABEL: @vmskltz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); } +// CHECK-LABEL: @vmskltz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); } +// CHECK-LABEL: @vmskltz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); } +// CHECK-LABEL: @vmskltz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); } +// CHECK-LABEL: @vsigncov_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsigncov_b(_1, _2); +} +// CHECK-LABEL: @vsigncov_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsigncov_h(_1, _2); +} +// CHECK-LABEL: @vsigncov_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsigncov_w(_1, _2); +} +// CHECK-LABEL: @vsigncov_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsigncov_d(_1, _2); +} +// CHECK-LABEL: @vfmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfmadd_s(_1, _2, _3); +} +// CHECK-LABEL: @vfmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vfmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfmsub_s(_1, _2, _3); +} +// CHECK-LABEL: @vfmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vfnmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfnmadd_s(_1, _2, _3); +} +// CHECK-LABEL: @vfnmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfnmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vfnmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]], <4 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfnmsub_s(_1, _2, _3); +} +// CHECK-LABEL: @vfnmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]], <2 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfnmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vftintrne_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); } +// CHECK-LABEL: @vftintrne_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); } +// CHECK-LABEL: @vftintrp_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); } +// CHECK-LABEL: @vftintrp_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); } +// CHECK-LABEL: @vftintrm_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); } +// CHECK-LABEL: @vftintrm_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); } +// CHECK-LABEL: @vftint_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftint_w_d(_1, _2); +} +// CHECK-LABEL: @vffint_s_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vffint_s_l(_1, _2); +} +// CHECK-LABEL: @vftintrz_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrz_w_d(_1, _2); +} +// CHECK-LABEL: @vftintrp_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrp_w_d(_1, _2); +} +// CHECK-LABEL: @vftintrm_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrm_w_d(_1, _2); +} +// CHECK-LABEL: @vftintrne_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrne_w_d(_1, _2); +} +// CHECK-LABEL: @vftintl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); } +// CHECK-LABEL: @vftinth_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); } +// CHECK-LABEL: @vffinth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); } +// CHECK-LABEL: @vffintl_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// +v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); } +// CHECK-LABEL: @vftintrzl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); } +// CHECK-LABEL: @vftintrzh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); } +// CHECK-LABEL: @vftintrpl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); } +// CHECK-LABEL: @vftintrph_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); } +// CHECK-LABEL: @vftintrml_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); } +// CHECK-LABEL: @vftintrmh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); } +// CHECK-LABEL: @vftintrnel_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrnel_l_s(v4f32 _1) { + return __builtin_lsx_vftintrnel_l_s(_1); +} +// CHECK-LABEL: @vftintrneh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vftintrneh_l_s(v4f32 _1) { + return __builtin_lsx_vftintrneh_l_s(_1); +} +// CHECK-LABEL: @vfrintrne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); } +// CHECK-LABEL: @vfrintrne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// +v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); } +// CHECK-LABEL: @vfrintrz_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); } +// CHECK-LABEL: @vfrintrz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// +v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); } +// CHECK-LABEL: @vfrintrp_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); } +// CHECK-LABEL: @vfrintrp_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// +v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); } +// CHECK-LABEL: @vfrintrm_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); } +// CHECK-LABEL: @vfrintrm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[TMP0]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// +v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); } +// CHECK-LABEL: @vstelm_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_b(v16i8 _1, void *_2) { + return __builtin_lsx_vstelm_b(_1, _2, 1, 1); +} +// CHECK-LABEL: @vstelm_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_h(v8i16 _1, void *_2) { + return __builtin_lsx_vstelm_h(_1, _2, 2, 1); +} +// CHECK-LABEL: @vstelm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_w(v4i32 _1, void *_2) { + return __builtin_lsx_vstelm_w(_1, _2, 4, 1); +} +// CHECK-LABEL: @vstelm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_d(v2i64 _1, void *_2) { + return __builtin_lsx_vstelm_d(_1, _2, 8, 1); +} +// CHECK-LABEL: @vaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vaddwev_d_w(_1, _2); +} +// CHECK-LABEL: @vaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vaddwev_w_h(_1, _2); +} +// CHECK-LABEL: @vaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vaddwev_h_b(_1, _2); +} +// CHECK-LABEL: @vaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vaddwod_d_w(_1, _2); +} +// CHECK-LABEL: @vaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vaddwod_w_h(_1, _2); +} +// CHECK-LABEL: @vaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vaddwod_h_b(_1, _2); +} +// CHECK-LABEL: @vaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vaddwev_d_wu(_1, _2); +} +// CHECK-LABEL: @vaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vaddwev_w_hu(_1, _2); +} +// CHECK-LABEL: @vaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vaddwev_h_bu(_1, _2); +} +// CHECK-LABEL: @vaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vaddwod_d_wu(_1, _2); +} +// CHECK-LABEL: @vaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vaddwod_w_hu(_1, _2); +} +// CHECK-LABEL: @vaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vaddwod_h_bu(_1, _2); +} +// CHECK-LABEL: @vaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vaddwev_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vaddwev_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vaddwev_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vaddwod_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vaddwod_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vaddwod_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vsubwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsubwev_d_w(_1, _2); +} +// CHECK-LABEL: @vsubwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsubwev_w_h(_1, _2); +} +// CHECK-LABEL: @vsubwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsubwev_h_b(_1, _2); +} +// CHECK-LABEL: @vsubwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsubwod_d_w(_1, _2); +} +// CHECK-LABEL: @vsubwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsubwod_w_h(_1, _2); +} +// CHECK-LABEL: @vsubwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsubwod_h_b(_1, _2); +} +// CHECK-LABEL: @vsubwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsubwev_d_wu(_1, _2); +} +// CHECK-LABEL: @vsubwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsubwev_w_hu(_1, _2); +} +// CHECK-LABEL: @vsubwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsubwev_h_bu(_1, _2); +} +// CHECK-LABEL: @vsubwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsubwod_d_wu(_1, _2); +} +// CHECK-LABEL: @vsubwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsubwod_w_hu(_1, _2); +} +// CHECK-LABEL: @vsubwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsubwod_h_bu(_1, _2); +} +// CHECK-LABEL: @vaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vaddwev_q_d(_1, _2); +} +// CHECK-LABEL: @vaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vaddwod_q_d(_1, _2); +} +// CHECK-LABEL: @vaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vaddwev_q_du(_1, _2); +} +// CHECK-LABEL: @vaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vaddwod_q_du(_1, _2); +} +// CHECK-LABEL: @vsubwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsubwev_q_d(_1, _2); +} +// CHECK-LABEL: @vsubwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsubwod_q_d(_1, _2); +} +// CHECK-LABEL: @vsubwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsubwev_q_du(_1, _2); +} +// CHECK-LABEL: @vsubwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsubwod_q_du(_1, _2); +} +// CHECK-LABEL: @vaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vaddwev_q_du_d(_1, _2); +} +// CHECK-LABEL: @vaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vaddwod_q_du_d(_1, _2); +} +// CHECK-LABEL: @vmulwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vmulwev_d_w(_1, _2); +} +// CHECK-LABEL: @vmulwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vmulwev_w_h(_1, _2); +} +// CHECK-LABEL: @vmulwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vmulwev_h_b(_1, _2); +} +// CHECK-LABEL: @vmulwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vmulwod_d_w(_1, _2); +} +// CHECK-LABEL: @vmulwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vmulwod_w_h(_1, _2); +} +// CHECK-LABEL: @vmulwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vmulwod_h_b(_1, _2); +} +// CHECK-LABEL: @vmulwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmulwev_d_wu(_1, _2); +} +// CHECK-LABEL: @vmulwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmulwev_w_hu(_1, _2); +} +// CHECK-LABEL: @vmulwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmulwev_h_bu(_1, _2); +} +// CHECK-LABEL: @vmulwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmulwod_d_wu(_1, _2); +} +// CHECK-LABEL: @vmulwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmulwod_w_hu(_1, _2); +} +// CHECK-LABEL: @vmulwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmulwod_h_bu(_1, _2); +} +// CHECK-LABEL: @vmulwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vmulwev_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vmulwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vmulwev_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vmulwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vmulwev_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vmulwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vmulwod_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vmulwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vmulwod_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vmulwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vmulwod_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vmulwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vmulwev_q_d(_1, _2); +} +// CHECK-LABEL: @vmulwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vmulwod_q_d(_1, _2); +} +// CHECK-LABEL: @vmulwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmulwev_q_du(_1, _2); +} +// CHECK-LABEL: @vmulwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmulwod_q_du(_1, _2); +} +// CHECK-LABEL: @vmulwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vmulwev_q_du_d(_1, _2); +} +// CHECK-LABEL: @vmulwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vmulwod_q_du_d(_1, _2); +} +// CHECK-LABEL: @vhaddw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vhaddw_q_d(_1, _2); +} +// CHECK-LABEL: @vhaddw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vhaddw_qu_du(_1, _2); +} +// CHECK-LABEL: @vhsubw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vhsubw_q_d(_1, _2); +} +// CHECK-LABEL: @vhsubw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vhsubw_qu_du(_1, _2); +} +// CHECK-LABEL: @vmaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwev_d_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwev_w_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwev_h_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwod_d_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwod_w_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwod_h_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[_1:%.*]], <4 x i32> [[_2:%.*]], <4 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[_1:%.*]], <8 x i16> [[_2:%.*]], <8 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwev_q_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwod_q_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __builtin_lsx_vmaddwev_q_du(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __builtin_lsx_vmaddwod_q_du(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], <2 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3); +} +// CHECK-LABEL: @vrotr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vrotr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vrotr_b(_1, _2); +} +// CHECK-LABEL: @vrotr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vrotr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vrotr_h(_1, _2); +} +// CHECK-LABEL: @vrotr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vrotr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vrotr_w(_1, _2); +} +// CHECK-LABEL: @vrotr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vrotr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vrotr_d(_1, _2); +} +// CHECK-LABEL: @vadd_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); } +// CHECK-LABEL: @vsub_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); } +// CHECK-LABEL: @vldrepl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); } +// CHECK-LABEL: @vldrepl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); } +// CHECK-LABEL: @vldrepl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); } +// CHECK-LABEL: @vldrepl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); } +// CHECK-LABEL: @vmskgez_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); } +// CHECK-LABEL: @vmsknz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); } +// CHECK-LABEL: @vexth_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); } +// CHECK-LABEL: @vexth_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); } +// CHECK-LABEL: @vexth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); } +// CHECK-LABEL: @vexth_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); } +// CHECK-LABEL: @vexth_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); } +// CHECK-LABEL: @vexth_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); } +// CHECK-LABEL: @vexth_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); } +// CHECK-LABEL: @vexth_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); } +// CHECK-LABEL: @vrotri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); } +// CHECK-LABEL: @vrotri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); } +// CHECK-LABEL: @vrotri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); } +// CHECK-LABEL: @vrotri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); } +// CHECK-LABEL: @vextl_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); } +// CHECK-LABEL: @vsrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vsrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vsrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vsrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vsrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlrni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vsrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlrni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vsrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlrni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vsrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlrni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrlni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrlni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrlni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrlni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrlni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrlrni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrani_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrani_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrani_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrani_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vsrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrarni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vsrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrarni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vsrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrarni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vsrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrarni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrani_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrani_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrani_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrani_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrani_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrani_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrani_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrani_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrarni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrarni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrarni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrarni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrarni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrarni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrarni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrarni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vpermi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vpermi_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpermi_w(_1, _2, 1); +} +// CHECK-LABEL: @vld( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); } +// CHECK-LABEL: @vst( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) +// CHECK-NEXT: ret void +// +void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); } +// CHECK-LABEL: @vssrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrn_b_h(_1, _2); +} +// CHECK-LABEL: @vssrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrn_h_w(_1, _2); +} +// CHECK-LABEL: @vssrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrn_w_d(_1, _2); +} +// CHECK-LABEL: @vssrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[_1:%.*]], <8 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrln_b_h(_1, _2); +} +// CHECK-LABEL: @vssrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[_1:%.*]], <4 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrln_h_w(_1, _2); +} +// CHECK-LABEL: @vssrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[_1:%.*]], <2 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrln_w_d(_1, _2); +} +// CHECK-LABEL: @vorn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); } +// CHECK-LABEL: @vldi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vldi() { return __builtin_lsx_vldi(1); } +// CHECK-LABEL: @vshuf_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[_1:%.*]], <16 x i8> [[_2:%.*]], <16 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vshuf_b(_1, _2, _3); +} +// CHECK-LABEL: @vldx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); } +// CHECK-LABEL: @vstx( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) +// CHECK-NEXT: ret void +// +void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); } +// CHECK-LABEL: @vextl_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); } +// CHECK-LABEL: @bnz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); } +// CHECK-LABEL: @bnz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); } +// CHECK-LABEL: @bnz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); } +// CHECK-LABEL: @bnz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); } +// CHECK-LABEL: @bnz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); } +// CHECK-LABEL: @bz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); } +// CHECK-LABEL: @bz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); } +// CHECK-LABEL: @bz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); } +// CHECK-LABEL: @bz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); } +// CHECK-LABEL: @bz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); } +// CHECK-LABEL: @vfcmp_caf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_caf_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_caf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_caf_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_ceq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_ceq_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_ceq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_ceq_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cle_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cle_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_clt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_clt_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_clt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_clt_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cne_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cne_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cor_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cor_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cueq_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cueq_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cule_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cule_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cult_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cult_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cun_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cune_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cune_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cun_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_saf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_saf_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_saf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_saf_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_seq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_seq_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_seq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_seq_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sle_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sle_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_slt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_slt_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_slt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_slt_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sne_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sne_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sor_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sor_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sueq_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sueq_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sule_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sule_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sult_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sult_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sun_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[_1:%.*]], <2 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sune_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sune_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[_1:%.*]], <4 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sun_s(_1, _2); +} +// CHECK-LABEL: @vrepli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); } +// CHECK-LABEL: @vrepli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +// CHECK-NEXT: ret <2 x i64> [[TMP0]] +// +v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); } +// CHECK-LABEL: @vrepli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); } +// CHECK-LABEL: @vrepli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); } From 535408eedbf812d9038bd40a0faae5001d2256cf Mon Sep 17 00:00:00 2001 From: chenli Date: Fri, 27 Oct 2023 15:58:55 +0800 Subject: [PATCH 125/144] [LoongArch][CodeGen] Add LASX builtin testcases --- .../LoongArch/lasx/builtin-alias-error.c | 1373 +++++ .../CodeGen/LoongArch/lasx/builtin-alias.c | 4430 ++++++++++++++++ .../CodeGen/LoongArch/lasx/builtin-error.c | 1392 ++++++ clang/test/CodeGen/LoongArch/lasx/builtin.c | 4452 +++++++++++++++++ 4 files changed, 11647 insertions(+) create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-alias.c create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-error.c create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin.c diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c new file mode 100644 index 00000000000000..2a3862bbe3c18c --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c @@ -0,0 +1,1373 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s + +#include + +v32i8 xvslli_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} + return res; +} + +v16i16 xvslli_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} + return res; +} + +v8i32 xvslli_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} + return res; +} + +v4i64 xvslli_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} + return res; +} + +v32i8 xvsrai_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} + return res; +} + +v16i16 xvsrai_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} + return res; +} + +v8i32 xvsrai_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} + return res; +} + +v4i64 xvsrai_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} + return res; +} + +v32i8 xvsrari_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} + return res; +} + +v16i16 xvsrari_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} + return res; +} + +v8i32 xvsrari_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} + return res; +} + +v4i64 xvsrari_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} + return res; +} + +v32i8 xvsrli_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} + return res; +} + +v16i16 xvsrli_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} + return res; +} + +v8i32 xvsrli_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} + return res; +} + +v4i64 xvsrli_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} + return res; +} + +v32i8 xvsrlri_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} + return res; +} + +v16i16 xvsrlri_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} + return res; +} + +v8i32 xvsrlri_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} + return res; +} + +v4i64 xvsrlri_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} + return res; +} + +v32u8 xvbitclri_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} + return res; +} + +v16u16 xvbitclri_h(v16u16 _1, int var) { + v16u16 res = __lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} + return res; +} + +v8u32 xvbitclri_w(v8u32 _1, int var) { + v8u32 res = __lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} + return res; +} + +v4u64 xvbitclri_d(v4u64 _1, int var) { + v4u64 res = __lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} + return res; +} + +v32u8 xvbitseti_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} + return res; +} + +v16u16 xvbitseti_h(v16u16 _1, int var) { + v16u16 res = __lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} + return res; +} + +v8u32 xvbitseti_w(v8u32 _1, int var) { + v8u32 res = __lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} + return res; +} + +v4u64 xvbitseti_d(v4u64 _1, int var) { + v4u64 res = __lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} + return res; +} + +v32u8 xvbitrevi_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} + return res; +} + +v16u16 xvbitrevi_h(v16u16 _1, int var) { + v16u16 res = __lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} + return res; +} + +v8u32 xvbitrevi_w(v8u32 _1, int var) { + v8u32 res = __lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} + return res; +} + +v4u64 xvbitrevi_d(v4u64 _1, int var) { + v4u64 res = __lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} + return res; +} + +v32i8 xvaddi_bu(v32i8 _1, int var) { + v32i8 res = __lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} + return res; +} + +v16i16 xvaddi_hu(v16i16 _1, int var) { + v16i16 res = __lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} + return res; +} + +v8i32 xvaddi_wu(v8i32 _1, int var) { + v8i32 res = __lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} + return res; +} + +v4i64 xvaddi_du(v4i64 _1, int var) { + v4i64 res = __lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} + return res; +} + +v32i8 xvsubi_bu(v32i8 _1, int var) { + v32i8 res = __lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} + return res; +} + +v16i16 xvsubi_hu(v16i16 _1, int var) { + v16i16 res = __lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} + return res; +} + +v8i32 xvsubi_wu(v8i32 _1, int var) { + v8i32 res = __lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} + return res; +} + +v4i64 xvsubi_du(v4i64 _1, int var) { + v4i64 res = __lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} + return res; +} + +v32i8 xvmaxi_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} + return res; +} + +v16i16 xvmaxi_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} + return res; +} + +v8i32 xvmaxi_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} + return res; +} + +v4i64 xvmaxi_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} + return res; +} + +v32u8 xvmaxi_bu(v32u8 _1, int var) { + v32u8 res = __lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} + return res; +} + +v16u16 xvmaxi_hu(v16u16 _1, int var) { + v16u16 res = __lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} + return res; +} + +v8u32 xvmaxi_wu(v8u32 _1, int var) { + v8u32 res = __lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} + return res; +} + +v4u64 xvmaxi_du(v4u64 _1, int var) { + v4u64 res = __lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} + return res; +} + +v32i8 xvmini_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} + return res; +} + +v16i16 xvmini_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} + return res; +} + +v8i32 xvmini_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} + return res; +} + +v4i64 xvmini_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} + return res; +} + +v32u8 xvmini_bu(v32u8 _1, int var) { + v32u8 res = __lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} + return res; +} + +v16u16 xvmini_hu(v16u16 _1, int var) { + v16u16 res = __lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} + return res; +} + +v8u32 xvmini_wu(v8u32 _1, int var) { + v8u32 res = __lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} + return res; +} + +v4u64 xvmini_du(v4u64 _1, int var) { + v4u64 res = __lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} + return res; +} + +v32i8 xvseqi_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} + return res; +} + +v16i16 xvseqi_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} + return res; +} + +v8i32 xvseqi_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} + return res; +} + +v4i64 xvseqi_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} + return res; +} + +v32i8 xvslti_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} + return res; +} + +v16i16 xvslti_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} + return res; +} + +v8i32 xvslti_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} + return res; +} + +v4i64 xvslti_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} + return res; +} + +v32i8 xvslti_bu(v32u8 _1, int var) { + v32i8 res = __lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} + return res; +} + +v16i16 xvslti_hu(v16u16 _1, int var) { + v16i16 res = __lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} + return res; +} + +v8i32 xvslti_wu(v8u32 _1, int var) { + v8i32 res = __lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} + return res; +} + +v4i64 xvslti_du(v4u64 _1, int var) { + v4i64 res = __lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} + return res; +} + +v32i8 xvslei_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} + return res; +} + +v16i16 xvslei_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} + return res; +} + +v8i32 xvslei_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} + return res; +} + +v4i64 xvslei_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} + return res; +} + +v32i8 xvslei_bu(v32u8 _1, int var) { + v32i8 res = __lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} + return res; +} + +v16i16 xvslei_hu(v16u16 _1, int var) { + v16i16 res = __lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} + return res; +} + +v8i32 xvslei_wu(v8u32 _1, int var) { + v8i32 res = __lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} + return res; +} + +v4i64 xvslei_du(v4u64 _1, int var) { + v4i64 res = __lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} + return res; +} + +v32i8 xvsat_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} + return res; +} + +v16i16 xvsat_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} + return res; +} + +v8i32 xvsat_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} + return res; +} + +v4i64 xvsat_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} + return res; +} + +v32u8 xvsat_bu(v32u8 _1, int var) { + v32u8 res = __lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} + return res; +} + +v16u16 xvsat_hu(v16u16 _1, int var) { + v16u16 res = __lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} + return res; +} + +v8u32 xvsat_wu(v8u32 _1, int var) { + v8u32 res = __lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} + return res; +} + +v4u64 xvsat_du(v4u64 _1, int var) { + v4u64 res = __lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} + return res; +} + +v32i8 xvrepl128vei_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} + return res; +} + +v16i16 xvrepl128vei_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} + return res; +} + +v8i32 xvrepl128vei_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} + return res; +} + +v4i64 xvrepl128vei_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} + return res; +} + +v32u8 xvandi_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} + return res; +} + +v32u8 xvori_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} + return res; +} + +v32u8 xvnori_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} + return res; +} + +v32u8 xvxori_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} + return res; +} + +v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { + v32u8 res = __lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} + return res; +} + +v32i8 xvshuf4i_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} + return res; +} + +v16i16 xvshuf4i_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} + return res; +} + +v8i32 xvshuf4i_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} + return res; +} + +v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} + return res; +} + +v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} + return res; +} + +v4i64 xvpermi_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} + return res; +} + +v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} + return res; +} + +v16i16 xvsllwil_h_b(v32i8 _1, int var) { + v16i16 res = __lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} + return res; +} + +v8i32 xvsllwil_w_h(v16i16 _1, int var) { + v8i32 res = __lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} + return res; +} + +v4i64 xvsllwil_d_w(v8i32 _1, int var) { + v4i64 res = __lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} + return res; +} + +v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { + v16u16 res = __lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} + return res; +} + +v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { + v8u32 res = __lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} + return res; +} + +v4u64 xvsllwil_du_wu(v8u32 _1, int var) { + v4u64 res = __lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} + return res; +} + +v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} + return res; +} + +v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} + return res; +} + +v32i8 xvbsrl_v(v32i8 _1, int var) { + v32i8 res = __lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} + return res; +} + +v32i8 xvbsll_v(v32i8 _1, int var) { + v32i8 res = __lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} + return res; +} + +v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} + return res; +} + +v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} + return res; +} + +v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} + return res; +} + +v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} + return res; +} + +v32i8 xvld(void *_1, int var) { + v32i8 res = __lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} + return res; +} + +void xvst(v32i8 _1, void *_2, int var) { + __lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + __lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} +} + +void xvstelm_b(v32i8 _1, void * _2, int var) { + __lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} + __lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} + __lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +} + +void xvstelm_h(v16i16 _1, void * _2, int var) { + __lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} + __lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} + __lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +} + +void xvstelm_w(v8i32 _1, void * _2, int var) { + __lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} + __lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} + __lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +} + +void xvstelm_d(v4i64 _1, void * _2, int var) { + __lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} + __lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} + __lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +} + +void xvstelm_b_idx(v32i8 _1, void * _2, int var) { + __lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + __lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +} + +void xvstelm_h_idx(v16i16 _1, void * _2, int var) { + __lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + __lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +} + +void xvstelm_w_idx(v8i32 _1, void * _2, int var) { + __lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + __lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +} + +void xvstelm_d_idx(v4i64 _1, void * _2, int var) { + __lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +} + +v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} + return res; +} + +v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} + return res; +} + +v8i32 xvpickve_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} + return res; +} + +v4i64 xvpickve_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} + return res; +} + +v4i64 xvldi(int var) { + v4i64 res = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + res |= __lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} + res |= __lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} + return res; +} + +v8i32 xvinsgr2vr_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} + return res; +} + +v4i64 xvinsgr2vr_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} + return res; +} + +v32i8 xvldrepl_b(void *_1, int var) { + v32i8 res = __lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} + return res; +} + +v16i16 xvldrepl_h(void *_1, int var) { + v16i16 res = __lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} + res |= __lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} + res |= __lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} + return res; +} + +v8i32 xvldrepl_w(void *_1, int var) { + v8i32 res = __lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} + res |= __lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} + res |= __lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} + return res; +} + +v4i64 xvldrepl_d(void *_1, int var) { + v4i64 res = __lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} + res |= __lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} + res |= __lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} + return res; +} + +int xvpickve2gr_w(v8i32 _1, int var) { + int res = __lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} + return res; +} + +unsigned int xvpickve2gr_wu(v8i32 _1, int var) { + unsigned int res = __lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} + return res; +} + +long xvpickve2gr_d(v4i64 _1, int var) { + long res = __lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} + return res; +} + +unsigned long int xvpickve2gr_du(v4i64 _1, int var) { + unsigned long int res = __lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} + return res; +} + +v32i8 xvrotri_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} + return res; +} + +v16i16 xvrotri_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} + return res; +} + +v8i32 xvrotri_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} + return res; +} + +v4i64 xvrotri_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} + return res; +} + +v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} + return res; +} + +v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} + return res; +} + +v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} + return res; +} + +v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} + return res; +} + +v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} + return res; +} + +v4f64 xvpickve_d_f(v4f64 _1, int var) { + v4f64 res = __lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res += __lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res += __lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} + return res; +} + +v8f32 xvpickve_w_f(v8f32 _1, int var) { + v8f32 res = __lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res += __lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res += __lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} + return res; +} + +v32i8 xvrepli_b(int var) { + v32i8 res = __lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} + return res; +} + +v4i64 xvrepli_d(int var) { + v4i64 res = __lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} + return res; +} + +v16i16 xvrepli_h(int var) { + v16i16 res = __lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} + return res; +} + +v8i32 xvrepli_w(int var) { + v8i32 res = __lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} + return res; +} diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c new file mode 100644 index 00000000000000..09b2d5fcacf530 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c @@ -0,0 +1,4430 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: @xvsll_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } +// CHECK-LABEL: @xvsll_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } +// CHECK-LABEL: @xvsll_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } +// CHECK-LABEL: @xvsll_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } +// CHECK-LABEL: @xvslli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } +// CHECK-LABEL: @xvslli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } +// CHECK-LABEL: @xvslli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } +// CHECK-LABEL: @xvslli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } +// CHECK-LABEL: @xvsra_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } +// CHECK-LABEL: @xvsra_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } +// CHECK-LABEL: @xvsra_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } +// CHECK-LABEL: @xvsra_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } +// CHECK-LABEL: @xvsrai_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } +// CHECK-LABEL: @xvsrai_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } +// CHECK-LABEL: @xvsrai_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } +// CHECK-LABEL: @xvsrai_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } +// CHECK-LABEL: @xvsrar_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } +// CHECK-LABEL: @xvsrar_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } +// CHECK-LABEL: @xvsrar_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } +// CHECK-LABEL: @xvsrar_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } +// CHECK-LABEL: @xvsrari_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } +// CHECK-LABEL: @xvsrari_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } +// CHECK-LABEL: @xvsrari_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } +// CHECK-LABEL: @xvsrari_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } +// CHECK-LABEL: @xvsrl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } +// CHECK-LABEL: @xvsrl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } +// CHECK-LABEL: @xvsrl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } +// CHECK-LABEL: @xvsrl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } +// CHECK-LABEL: @xvsrli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } +// CHECK-LABEL: @xvsrli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } +// CHECK-LABEL: @xvsrli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } +// CHECK-LABEL: @xvsrli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } +// CHECK-LABEL: @xvsrlr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } +// CHECK-LABEL: @xvsrlr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } +// CHECK-LABEL: @xvsrlr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } +// CHECK-LABEL: @xvsrlr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } +// CHECK-LABEL: @xvsrlri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } +// CHECK-LABEL: @xvsrlri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } +// CHECK-LABEL: @xvsrlri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } +// CHECK-LABEL: @xvsrlri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } +// CHECK-LABEL: @xvbitclr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } +// CHECK-LABEL: @xvbitclr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } +// CHECK-LABEL: @xvbitclr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } +// CHECK-LABEL: @xvbitclr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } +// CHECK-LABEL: @xvbitclri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } +// CHECK-LABEL: @xvbitclri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } +// CHECK-LABEL: @xvbitclri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } +// CHECK-LABEL: @xvbitclri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } +// CHECK-LABEL: @xvbitset_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } +// CHECK-LABEL: @xvbitset_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } +// CHECK-LABEL: @xvbitset_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } +// CHECK-LABEL: @xvbitset_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } +// CHECK-LABEL: @xvbitseti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } +// CHECK-LABEL: @xvbitseti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } +// CHECK-LABEL: @xvbitseti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } +// CHECK-LABEL: @xvbitseti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } +// CHECK-LABEL: @xvbitrev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } +// CHECK-LABEL: @xvbitrev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } +// CHECK-LABEL: @xvbitrev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } +// CHECK-LABEL: @xvbitrev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } +// CHECK-LABEL: @xvbitrevi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } +// CHECK-LABEL: @xvbitrevi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } +// CHECK-LABEL: @xvbitrevi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } +// CHECK-LABEL: @xvbitrevi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } +// CHECK-LABEL: @xvadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } +// CHECK-LABEL: @xvadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } +// CHECK-LABEL: @xvadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } +// CHECK-LABEL: @xvadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } +// CHECK-LABEL: @xvaddi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } +// CHECK-LABEL: @xvaddi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } +// CHECK-LABEL: @xvaddi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } +// CHECK-LABEL: @xvaddi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } +// CHECK-LABEL: @xvsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } +// CHECK-LABEL: @xvsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } +// CHECK-LABEL: @xvsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } +// CHECK-LABEL: @xvsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } +// CHECK-LABEL: @xvsubi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } +// CHECK-LABEL: @xvsubi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } +// CHECK-LABEL: @xvsubi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } +// CHECK-LABEL: @xvsubi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } +// CHECK-LABEL: @xvmax_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } +// CHECK-LABEL: @xvmax_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } +// CHECK-LABEL: @xvmax_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } +// CHECK-LABEL: @xvmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } +// CHECK-LABEL: @xvmaxi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } +// CHECK-LABEL: @xvmaxi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } +// CHECK-LABEL: @xvmaxi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } +// CHECK-LABEL: @xvmaxi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } +// CHECK-LABEL: @xvmax_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } +// CHECK-LABEL: @xvmax_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } +// CHECK-LABEL: @xvmax_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } +// CHECK-LABEL: @xvmax_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } +// CHECK-LABEL: @xvmaxi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } +// CHECK-LABEL: @xvmaxi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } +// CHECK-LABEL: @xvmaxi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } +// CHECK-LABEL: @xvmaxi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } +// CHECK-LABEL: @xvmin_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } +// CHECK-LABEL: @xvmin_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } +// CHECK-LABEL: @xvmin_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } +// CHECK-LABEL: @xvmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } +// CHECK-LABEL: @xvmini_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } +// CHECK-LABEL: @xvmini_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } +// CHECK-LABEL: @xvmini_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } +// CHECK-LABEL: @xvmini_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } +// CHECK-LABEL: @xvmin_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } +// CHECK-LABEL: @xvmin_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } +// CHECK-LABEL: @xvmin_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } +// CHECK-LABEL: @xvmin_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } +// CHECK-LABEL: @xvmini_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } +// CHECK-LABEL: @xvmini_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } +// CHECK-LABEL: @xvmini_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } +// CHECK-LABEL: @xvmini_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } +// CHECK-LABEL: @xvseq_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } +// CHECK-LABEL: @xvseq_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } +// CHECK-LABEL: @xvseq_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } +// CHECK-LABEL: @xvseq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } +// CHECK-LABEL: @xvseqi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } +// CHECK-LABEL: @xvseqi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } +// CHECK-LABEL: @xvseqi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } +// CHECK-LABEL: @xvseqi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } +// CHECK-LABEL: @xvslt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } +// CHECK-LABEL: @xvslt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } +// CHECK-LABEL: @xvslt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } +// CHECK-LABEL: @xvslt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } +// CHECK-LABEL: @xvslti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } +// CHECK-LABEL: @xvslti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } +// CHECK-LABEL: @xvslti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } +// CHECK-LABEL: @xvslti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } +// CHECK-LABEL: @xvslt_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } +// CHECK-LABEL: @xvslt_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } +// CHECK-LABEL: @xvslt_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } +// CHECK-LABEL: @xvslt_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } +// CHECK-LABEL: @xvslti_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } +// CHECK-LABEL: @xvslti_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } +// CHECK-LABEL: @xvslti_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } +// CHECK-LABEL: @xvslti_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } +// CHECK-LABEL: @xvsle_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } +// CHECK-LABEL: @xvsle_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } +// CHECK-LABEL: @xvsle_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } +// CHECK-LABEL: @xvsle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } +// CHECK-LABEL: @xvslei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } +// CHECK-LABEL: @xvslei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } +// CHECK-LABEL: @xvslei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } +// CHECK-LABEL: @xvslei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } +// CHECK-LABEL: @xvsle_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } +// CHECK-LABEL: @xvsle_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } +// CHECK-LABEL: @xvsle_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } +// CHECK-LABEL: @xvsle_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } +// CHECK-LABEL: @xvslei_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } +// CHECK-LABEL: @xvslei_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } +// CHECK-LABEL: @xvslei_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } +// CHECK-LABEL: @xvslei_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } +// CHECK-LABEL: @xvsat_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } +// CHECK-LABEL: @xvsat_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } +// CHECK-LABEL: @xvsat_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } +// CHECK-LABEL: @xvsat_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } +// CHECK-LABEL: @xvsat_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } +// CHECK-LABEL: @xvsat_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } +// CHECK-LABEL: @xvsat_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } +// CHECK-LABEL: @xvsat_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } +// CHECK-LABEL: @xvadda_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } +// CHECK-LABEL: @xvadda_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } +// CHECK-LABEL: @xvadda_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } +// CHECK-LABEL: @xvadda_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } +// CHECK-LABEL: @xvsadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } +// CHECK-LABEL: @xvsadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } +// CHECK-LABEL: @xvsadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } +// CHECK-LABEL: @xvsadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } +// CHECK-LABEL: @xvsadd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } +// CHECK-LABEL: @xvsadd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } +// CHECK-LABEL: @xvsadd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } +// CHECK-LABEL: @xvsadd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } +// CHECK-LABEL: @xvavg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } +// CHECK-LABEL: @xvavg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } +// CHECK-LABEL: @xvavg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } +// CHECK-LABEL: @xvavg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } +// CHECK-LABEL: @xvavg_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } +// CHECK-LABEL: @xvavg_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } +// CHECK-LABEL: @xvavg_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } +// CHECK-LABEL: @xvavg_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } +// CHECK-LABEL: @xvavgr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } +// CHECK-LABEL: @xvavgr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } +// CHECK-LABEL: @xvavgr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } +// CHECK-LABEL: @xvavgr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } +// CHECK-LABEL: @xvavgr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } +// CHECK-LABEL: @xvavgr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } +// CHECK-LABEL: @xvavgr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } +// CHECK-LABEL: @xvavgr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } +// CHECK-LABEL: @xvssub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } +// CHECK-LABEL: @xvssub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } +// CHECK-LABEL: @xvssub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } +// CHECK-LABEL: @xvssub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } +// CHECK-LABEL: @xvssub_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } +// CHECK-LABEL: @xvssub_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } +// CHECK-LABEL: @xvssub_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } +// CHECK-LABEL: @xvssub_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } +// CHECK-LABEL: @xvabsd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } +// CHECK-LABEL: @xvabsd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } +// CHECK-LABEL: @xvabsd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } +// CHECK-LABEL: @xvabsd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } +// CHECK-LABEL: @xvabsd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } +// CHECK-LABEL: @xvabsd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } +// CHECK-LABEL: @xvabsd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } +// CHECK-LABEL: @xvabsd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } +// CHECK-LABEL: @xvmul_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } +// CHECK-LABEL: @xvmul_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } +// CHECK-LABEL: @xvmul_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } +// CHECK-LABEL: @xvmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } +// CHECK-LABEL: @xvmadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvdiv_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } +// CHECK-LABEL: @xvdiv_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } +// CHECK-LABEL: @xvdiv_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } +// CHECK-LABEL: @xvdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } +// CHECK-LABEL: @xvdiv_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } +// CHECK-LABEL: @xvdiv_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } +// CHECK-LABEL: @xvdiv_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } +// CHECK-LABEL: @xvdiv_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } +// CHECK-LABEL: @xvhaddw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } +// CHECK-LABEL: @xvhaddw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } +// CHECK-LABEL: @xvhaddw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } +// CHECK-LABEL: @xvhaddw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } +// CHECK-LABEL: @xvhaddw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } +// CHECK-LABEL: @xvhaddw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } +// CHECK-LABEL: @xvhsubw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } +// CHECK-LABEL: @xvhsubw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } +// CHECK-LABEL: @xvhsubw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } +// CHECK-LABEL: @xvhsubw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } +// CHECK-LABEL: @xvhsubw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } +// CHECK-LABEL: @xvhsubw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } +// CHECK-LABEL: @xvmod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } +// CHECK-LABEL: @xvmod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } +// CHECK-LABEL: @xvmod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } +// CHECK-LABEL: @xvmod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } +// CHECK-LABEL: @xvmod_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } +// CHECK-LABEL: @xvmod_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } +// CHECK-LABEL: @xvmod_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } +// CHECK-LABEL: @xvmod_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } +// CHECK-LABEL: @xvrepl128vei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } +// CHECK-LABEL: @xvpickev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } +// CHECK-LABEL: @xvpickev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } +// CHECK-LABEL: @xvpickev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } +// CHECK-LABEL: @xvpickev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } +// CHECK-LABEL: @xvpickod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } +// CHECK-LABEL: @xvpickod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } +// CHECK-LABEL: @xvpickod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } +// CHECK-LABEL: @xvpickod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } +// CHECK-LABEL: @xvilvh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } +// CHECK-LABEL: @xvilvh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } +// CHECK-LABEL: @xvilvh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } +// CHECK-LABEL: @xvilvh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } +// CHECK-LABEL: @xvilvl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } +// CHECK-LABEL: @xvilvl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } +// CHECK-LABEL: @xvilvl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } +// CHECK-LABEL: @xvilvl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } +// CHECK-LABEL: @xvpackev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } +// CHECK-LABEL: @xvpackev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } +// CHECK-LABEL: @xvpackev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } +// CHECK-LABEL: @xvpackev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } +// CHECK-LABEL: @xvpackod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } +// CHECK-LABEL: @xvpackod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } +// CHECK-LABEL: @xvpackod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } +// CHECK-LABEL: @xvpackod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } +// CHECK-LABEL: @xvshuf_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } +// CHECK-LABEL: @xvand_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } +// CHECK-LABEL: @xvandi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } +// CHECK-LABEL: @xvor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } +// CHECK-LABEL: @xvori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } +// CHECK-LABEL: @xvnor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } +// CHECK-LABEL: @xvnori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } +// CHECK-LABEL: @xvxor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } +// CHECK-LABEL: @xvxori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } +// CHECK-LABEL: @xvbitsel_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } +// CHECK-LABEL: @xvbitseli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } +// CHECK-LABEL: @xvshuf4i_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } +// CHECK-LABEL: @xvshuf4i_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } +// CHECK-LABEL: @xvshuf4i_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } +// CHECK-LABEL: @xvreplgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } +// CHECK-LABEL: @xvreplgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } +// CHECK-LABEL: @xvreplgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } +// CHECK-LABEL: @xvreplgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } +// CHECK-LABEL: @xvpcnt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } +// CHECK-LABEL: @xvpcnt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } +// CHECK-LABEL: @xvpcnt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } +// CHECK-LABEL: @xvpcnt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } +// CHECK-LABEL: @xvclo_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } +// CHECK-LABEL: @xvclo_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } +// CHECK-LABEL: @xvclo_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } +// CHECK-LABEL: @xvclo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } +// CHECK-LABEL: @xvclz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } +// CHECK-LABEL: @xvclz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } +// CHECK-LABEL: @xvclz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } +// CHECK-LABEL: @xvclz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } +// CHECK-LABEL: @xvfadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } +// CHECK-LABEL: @xvfadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } +// CHECK-LABEL: @xvfsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } +// CHECK-LABEL: @xvfsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } +// CHECK-LABEL: @xvfmul_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } +// CHECK-LABEL: @xvfmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } +// CHECK-LABEL: @xvfdiv_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } +// CHECK-LABEL: @xvfdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } +// CHECK-LABEL: @xvfcvt_h_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } +// CHECK-LABEL: @xvfcvt_s_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } +// CHECK-LABEL: @xvfmin_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } +// CHECK-LABEL: @xvfmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } +// CHECK-LABEL: @xvfmina_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } +// CHECK-LABEL: @xvfmina_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } +// CHECK-LABEL: @xvfmax_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } +// CHECK-LABEL: @xvfmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } +// CHECK-LABEL: @xvfmaxa_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } +// CHECK-LABEL: @xvfmaxa_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } +// CHECK-LABEL: @xvfclass_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } +// CHECK-LABEL: @xvfclass_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } +// CHECK-LABEL: @xvfsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } +// CHECK-LABEL: @xvfsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } +// CHECK-LABEL: @xvfrecip_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } +// CHECK-LABEL: @xvfrecip_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } +// CHECK-LABEL: @xvfrint_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } +// CHECK-LABEL: @xvfrint_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } +// CHECK-LABEL: @xvfrsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } +// CHECK-LABEL: @xvfrsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } +// CHECK-LABEL: @xvflogb_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } +// CHECK-LABEL: @xvflogb_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } +// CHECK-LABEL: @xvfcvth_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } +// CHECK-LABEL: @xvfcvth_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } +// CHECK-LABEL: @xvfcvtl_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } +// CHECK-LABEL: @xvfcvtl_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } +// CHECK-LABEL: @xvftint_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } +// CHECK-LABEL: @xvftint_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } +// CHECK-LABEL: @xvftint_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } +// CHECK-LABEL: @xvftint_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } +// CHECK-LABEL: @xvftintrz_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } +// CHECK-LABEL: @xvftintrz_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } +// CHECK-LABEL: @xvftintrz_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } +// CHECK-LABEL: @xvftintrz_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } +// CHECK-LABEL: @xvffint_s_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } +// CHECK-LABEL: @xvffint_d_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } +// CHECK-LABEL: @xvffint_s_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } +// CHECK-LABEL: @xvffint_d_lu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } +// CHECK-LABEL: @xvreplve_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } +// CHECK-LABEL: @xvreplve_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } +// CHECK-LABEL: @xvreplve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } +// CHECK-LABEL: @xvreplve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } +// CHECK-LABEL: @xvpermi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } +// CHECK-LABEL: @xvandn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } +// CHECK-LABEL: @xvneg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } +// CHECK-LABEL: @xvneg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } +// CHECK-LABEL: @xvneg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } +// CHECK-LABEL: @xvneg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } +// CHECK-LABEL: @xvmuh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } +// CHECK-LABEL: @xvmuh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } +// CHECK-LABEL: @xvmuh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } +// CHECK-LABEL: @xvmuh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } +// CHECK-LABEL: @xvmuh_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } +// CHECK-LABEL: @xvmuh_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } +// CHECK-LABEL: @xvmuh_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } +// CHECK-LABEL: @xvmuh_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } +// CHECK-LABEL: @xvsllwil_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } +// CHECK-LABEL: @xvsllwil_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } +// CHECK-LABEL: @xvsllwil_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } +// CHECK-LABEL: @xvsllwil_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } +// CHECK-LABEL: @xvsllwil_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } +// CHECK-LABEL: @xvsllwil_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } +// CHECK-LABEL: @xvsran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } +// CHECK-LABEL: @xvsran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } +// CHECK-LABEL: @xvsran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } +// CHECK-LABEL: @xvssran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } +// CHECK-LABEL: @xvssran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } +// CHECK-LABEL: @xvssran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } +// CHECK-LABEL: @xvssran_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } +// CHECK-LABEL: @xvssran_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } +// CHECK-LABEL: @xvssran_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } +// CHECK-LABEL: @xvsrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } +// CHECK-LABEL: @xvsrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } +// CHECK-LABEL: @xvssrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } +// CHECK-LABEL: @xvssrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrarn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrarn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrarn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } +// CHECK-LABEL: @xvsrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } +// CHECK-LABEL: @xvsrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } +// CHECK-LABEL: @xvssrln_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrln_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrln_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } +// CHECK-LABEL: @xvsrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } +// CHECK-LABEL: @xvsrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrlrn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrlrn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrlrn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } +// CHECK-LABEL: @xvfrstpi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } +// CHECK-LABEL: @xvfrstpi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } +// CHECK-LABEL: @xvfrstp_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } +// CHECK-LABEL: @xvfrstp_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } +// CHECK-LABEL: @xvshuf4i_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } +// CHECK-LABEL: @xvbsrl_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } +// CHECK-LABEL: @xvbsll_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } +// CHECK-LABEL: @xvextrins_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } +// CHECK-LABEL: @xvmskltz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } +// CHECK-LABEL: @xvmskltz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } +// CHECK-LABEL: @xvmskltz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } +// CHECK-LABEL: @xvmskltz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } +// CHECK-LABEL: @xvsigncov_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } +// CHECK-LABEL: @xvsigncov_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } +// CHECK-LABEL: @xvsigncov_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } +// CHECK-LABEL: @xvsigncov_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } +// CHECK-LABEL: @xvfmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } +// CHECK-LABEL: @xvfmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvfmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } +// CHECK-LABEL: @xvfmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvfnmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } +// CHECK-LABEL: @xvfnmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvfnmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } +// CHECK-LABEL: @xvfnmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvftintrne_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } +// CHECK-LABEL: @xvftintrne_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } +// CHECK-LABEL: @xvftintrp_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } +// CHECK-LABEL: @xvftintrp_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } +// CHECK-LABEL: @xvftintrm_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } +// CHECK-LABEL: @xvftintrm_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } +// CHECK-LABEL: @xvftint_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } +// CHECK-LABEL: @xvffint_s_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } +// CHECK-LABEL: @xvftintrz_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrp_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrm_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrne_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } +// CHECK-LABEL: @xvftinth_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } +// CHECK-LABEL: @xvftintl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } +// CHECK-LABEL: @xvffinth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } +// CHECK-LABEL: @xvffintl_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } +// CHECK-LABEL: @xvftintrzh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } +// CHECK-LABEL: @xvftintrzl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } +// CHECK-LABEL: @xvftintrph_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } +// CHECK-LABEL: @xvftintrpl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } +// CHECK-LABEL: @xvftintrmh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } +// CHECK-LABEL: @xvftintrml_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } +// CHECK-LABEL: @xvftintrneh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } +// CHECK-LABEL: @xvftintrnel_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } +// CHECK-LABEL: @xvfrintrne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +// CHECK-NEXT: ret <8 x i32> [[TMP1]] +// +v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } +// CHECK-LABEL: @xvfrintrne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP1]] +// +v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } +// CHECK-LABEL: @xvfrintrz_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +// CHECK-NEXT: ret <8 x i32> [[TMP1]] +// +v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } +// CHECK-LABEL: @xvfrintrz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP1]] +// +v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } +// CHECK-LABEL: @xvfrintrp_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +// CHECK-NEXT: ret <8 x i32> [[TMP1]] +// +v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } +// CHECK-LABEL: @xvfrintrp_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP1]] +// +v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } +// CHECK-LABEL: @xvfrintrm_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +// CHECK-NEXT: ret <8 x i32> [[TMP1]] +// +v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } +// CHECK-LABEL: @xvfrintrm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP1]] +// +v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } +// CHECK-LABEL: @xvld( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } +// CHECK-LABEL: @xvst( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) +// CHECK-NEXT: ret void +// +void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } +// CHECK-LABEL: @xvstelm_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } +// CHECK-LABEL: @xvstelm_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } +// CHECK-LABEL: @xvstelm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } +// CHECK-LABEL: @xvstelm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } +// CHECK-LABEL: @xvinsve0_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } +// CHECK-LABEL: @xvinsve0_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } +// CHECK-LABEL: @xvpickve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } +// CHECK-LABEL: @xvpickve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } +// CHECK-LABEL: @xvssrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } +// CHECK-LABEL: @xvssrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } +// CHECK-LABEL: @xvssrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } +// CHECK-LABEL: @xvssrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } +// CHECK-LABEL: @xvssrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } +// CHECK-LABEL: @xvorn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } +// CHECK-LABEL: @xvldi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvldi() { return __lasx_xvldi(1); } +// CHECK-LABEL: @xvldx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } +// CHECK-LABEL: @xvstx( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) +// CHECK-NEXT: ret void +// +void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } +// CHECK-LABEL: @xvextl_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } +// CHECK-LABEL: @xvinsgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } +// CHECK-LABEL: @xvinsgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } +// CHECK-LABEL: @xvreplve0_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } +// CHECK-LABEL: @xvreplve0_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } +// CHECK-LABEL: @xvreplve0_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } +// CHECK-LABEL: @xvreplve0_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } +// CHECK-LABEL: @xvreplve0_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } +// CHECK-LABEL: @vext2xv_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } +// CHECK-LABEL: @vext2xv_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } +// CHECK-LABEL: @vext2xv_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } +// CHECK-LABEL: @vext2xv_w_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } +// CHECK-LABEL: @vext2xv_d_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } +// CHECK-LABEL: @vext2xv_d_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } +// CHECK-LABEL: @vext2xv_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } +// CHECK-LABEL: @vext2xv_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } +// CHECK-LABEL: @vext2xv_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } +// CHECK-LABEL: @vext2xv_wu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } +// CHECK-LABEL: @vext2xv_du_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } +// CHECK-LABEL: @vext2xv_du_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } +// CHECK-LABEL: @xvpermi_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } +// CHECK-LABEL: @xvpermi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } +// CHECK-LABEL: @xvperm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } +// CHECK-LABEL: @xvldrepl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } +// CHECK-LABEL: @xvldrepl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } +// CHECK-LABEL: @xvldrepl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } +// CHECK-LABEL: @xvldrepl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } +// CHECK-LABEL: @xvpickve2gr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i64 [[TMP0]] +// +long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i64 [[TMP0]] +// +unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } +// CHECK-LABEL: @xvaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } +// CHECK-LABEL: @xvaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvsubwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } +// CHECK-LABEL: @xvsubwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } +// CHECK-LABEL: @xvsubwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } +// CHECK-LABEL: @xvsubwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } +// CHECK-LABEL: @xvsubwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } +// CHECK-LABEL: @xvsubwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvsubwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvsubwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvsubwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } +// CHECK-LABEL: @xvsubwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } +// CHECK-LABEL: @xvsubwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } +// CHECK-LABEL: @xvsubwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } +// CHECK-LABEL: @xvsubwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } +// CHECK-LABEL: @xvsubwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvsubwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvsubwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvhaddw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } +// CHECK-LABEL: @xvhaddw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } +// CHECK-LABEL: @xvhsubw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } +// CHECK-LABEL: @xvhsubw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } +// CHECK-LABEL: @xvmaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } +// CHECK-LABEL: @xvrotr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } +// CHECK-LABEL: @xvrotr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } +// CHECK-LABEL: @xvrotr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } +// CHECK-LABEL: @xvrotr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } +// CHECK-LABEL: @xvadd_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } +// CHECK-LABEL: @xvsub_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } +// CHECK-LABEL: @xvaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmskgez_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } +// CHECK-LABEL: @xvmsknz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } +// CHECK-LABEL: @xvexth_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } +// CHECK-LABEL: @xvexth_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } +// CHECK-LABEL: @xvexth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } +// CHECK-LABEL: @xvexth_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } +// CHECK-LABEL: @xvexth_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } +// CHECK-LABEL: @xvexth_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } +// CHECK-LABEL: @xvexth_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } +// CHECK-LABEL: @xvexth_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } +// CHECK-LABEL: @xvrotri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } +// CHECK-LABEL: @xvrotri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } +// CHECK-LABEL: @xvrotri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } +// CHECK-LABEL: @xvrotri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } +// CHECK-LABEL: @xvextl_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } +// CHECK-LABEL: @xvsrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xbnz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } +// CHECK-LABEL: @xbnz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } +// CHECK-LABEL: @xbnz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } +// CHECK-LABEL: @xbnz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } +// CHECK-LABEL: @xbnz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } +// CHECK-LABEL: @xbz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } +// CHECK-LABEL: @xbz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } +// CHECK-LABEL: @xbz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } +// CHECK-LABEL: @xbz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } +// CHECK-LABEL: @xbz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } +// CHECK-LABEL: @xvfcmp_caf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_caf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_ceq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_ceq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_clt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_clt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_saf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_saf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_seq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_seq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_slt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_slt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } +// CHECK-LABEL: @xvpickve_d_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } +// CHECK-LABEL: @xvpickve_w_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } +// CHECK-LABEL: @xvrepli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } +// CHECK-LABEL: @xvrepli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } +// CHECK-LABEL: @xvrepli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } +// CHECK-LABEL: @xvrepli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c new file mode 100644 index 00000000000000..724484465769e0 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c @@ -0,0 +1,1392 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +v32i8 xvslli_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} + return res; +} + +v16i16 xvslli_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} + return res; +} + +v8i32 xvslli_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} + return res; +} + +v4i64 xvslli_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} + return res; +} + +v32i8 xvsrai_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} + return res; +} + +v16i16 xvsrai_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} + return res; +} + +v8i32 xvsrai_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} + return res; +} + +v4i64 xvsrai_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} + return res; +} + +v32i8 xvsrari_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} + return res; +} + +v16i16 xvsrari_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} + return res; +} + +v8i32 xvsrari_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} + return res; +} + +v4i64 xvsrari_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} + return res; +} + +v32i8 xvsrli_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} + return res; +} + +v16i16 xvsrli_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} + return res; +} + +v8i32 xvsrli_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} + return res; +} + +v4i64 xvsrli_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} + return res; +} + +v32i8 xvsrlri_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} + return res; +} + +v16i16 xvsrlri_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} + return res; +} + +v8i32 xvsrlri_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} + return res; +} + +v4i64 xvsrlri_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} + return res; +} + +v32u8 xvbitclri_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} + return res; +} + +v16u16 xvbitclri_h(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} + return res; +} + +v8u32 xvbitclri_w(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} + return res; +} + +v4u64 xvbitclri_d(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} + return res; +} + +v32u8 xvbitseti_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} + return res; +} + +v16u16 xvbitseti_h(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} + return res; +} + +v8u32 xvbitseti_w(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} + return res; +} + +v4u64 xvbitseti_d(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} + return res; +} + +v32u8 xvbitrevi_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} + return res; +} + +v16u16 xvbitrevi_h(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} + return res; +} + +v8u32 xvbitrevi_w(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} + return res; +} + +v4u64 xvbitrevi_d(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} + return res; +} + +v32i8 xvaddi_bu(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} + return res; +} + +v16i16 xvaddi_hu(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} + return res; +} + +v8i32 xvaddi_wu(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} + return res; +} + +v4i64 xvaddi_du(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} + return res; +} + +v32i8 xvsubi_bu(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} + return res; +} + +v16i16 xvsubi_hu(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} + return res; +} + +v8i32 xvsubi_wu(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} + return res; +} + +v4i64 xvsubi_du(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} + return res; +} + +v32i8 xvmaxi_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} + return res; +} + +v16i16 xvmaxi_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} + return res; +} + +v8i32 xvmaxi_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} + return res; +} + +v4i64 xvmaxi_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} + return res; +} + +v32u8 xvmaxi_bu(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} + return res; +} + +v16u16 xvmaxi_hu(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} + return res; +} + +v8u32 xvmaxi_wu(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} + return res; +} + +v4u64 xvmaxi_du(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} + return res; +} + +v32i8 xvmini_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} + return res; +} + +v16i16 xvmini_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} + return res; +} + +v8i32 xvmini_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} + return res; +} + +v4i64 xvmini_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} + return res; +} + +v32u8 xvmini_bu(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} + return res; +} + +v16u16 xvmini_hu(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} + return res; +} + +v8u32 xvmini_wu(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} + return res; +} + +v4u64 xvmini_du(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} + return res; +} + +v32i8 xvseqi_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} + return res; +} + +v16i16 xvseqi_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} + return res; +} + +v8i32 xvseqi_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} + return res; +} + +v4i64 xvseqi_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} + return res; +} + +v32i8 xvslti_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} + return res; +} + +v16i16 xvslti_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} + return res; +} + +v8i32 xvslti_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} + return res; +} + +v4i64 xvslti_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} + return res; +} + +v32i8 xvslti_bu(v32u8 _1, int var) { + v32i8 res = __builtin_lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} + return res; +} + +v16i16 xvslti_hu(v16u16 _1, int var) { + v16i16 res = __builtin_lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} + return res; +} + +v8i32 xvslti_wu(v8u32 _1, int var) { + v8i32 res = __builtin_lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} + return res; +} + +v4i64 xvslti_du(v4u64 _1, int var) { + v4i64 res = __builtin_lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} + return res; +} + +v32i8 xvslei_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} + return res; +} + +v16i16 xvslei_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} + return res; +} + +v8i32 xvslei_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} + return res; +} + +v4i64 xvslei_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} + return res; +} + +v32i8 xvslei_bu(v32u8 _1, int var) { + v32i8 res = __builtin_lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} + return res; +} + +v16i16 xvslei_hu(v16u16 _1, int var) { + v16i16 res = __builtin_lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} + return res; +} + +v8i32 xvslei_wu(v8u32 _1, int var) { + v8i32 res = __builtin_lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} + return res; +} + +v4i64 xvslei_du(v4u64 _1, int var) { + v4i64 res = __builtin_lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} + return res; +} + +v32i8 xvsat_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} + return res; +} + +v16i16 xvsat_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} + return res; +} + +v8i32 xvsat_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} + return res; +} + +v4i64 xvsat_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} + return res; +} + +v32u8 xvsat_bu(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} + return res; +} + +v16u16 xvsat_hu(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} + return res; +} + +v8u32 xvsat_wu(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} + return res; +} + +v4u64 xvsat_du(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} + return res; +} + +v32i8 xvrepl128vei_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} + return res; +} + +v16i16 xvrepl128vei_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} + return res; +} + +v8i32 xvrepl128vei_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} + return res; +} + +v4i64 xvrepl128vei_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} + return res; +} + +v32u8 xvandi_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} + return res; +} + +v32u8 xvori_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} + return res; +} + +v32u8 xvnori_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} + return res; +} + +v32u8 xvxori_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} + return res; +} + +v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { + v32u8 res = __builtin_lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} + return res; +} + +v32i8 xvshuf4i_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} + return res; +} + +v16i16 xvshuf4i_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} + return res; +} + +v8i32 xvshuf4i_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} + return res; +} + +v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} + return res; +} + +v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} + return res; +} + +v4i64 xvpermi_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} + return res; +} + +v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} + return res; +} + +v16i16 xvsllwil_h_b(v32i8 _1, int var) { + v16i16 res = __builtin_lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} + return res; +} + +v8i32 xvsllwil_w_h(v16i16 _1, int var) { + v8i32 res = __builtin_lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} + return res; +} + +v4i64 xvsllwil_d_w(v8i32 _1, int var) { + v4i64 res = __builtin_lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} + return res; +} + +v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { + v16u16 res = __builtin_lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} + return res; +} + +v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { + v8u32 res = __builtin_lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} + return res; +} + +v4u64 xvsllwil_du_wu(v8u32 _1, int var) { + v4u64 res = __builtin_lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} + return res; +} + +v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} + return res; +} + +v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} + return res; +} + +v32i8 xvbsrl_v(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} + return res; +} + +v32i8 xvbsll_v(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} + return res; +} + +v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} + return res; +} + +v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} + return res; +} + +v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} + return res; +} + +v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} + return res; +} + +v32i8 xvld(void *_1, int var) { + v32i8 res = __builtin_lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __builtin_lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __builtin_lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} + return res; +} + +void xvst(v32i8 _1, void *_2, int var) { + __builtin_lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __builtin_lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + __builtin_lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} +} + +void xvstelm_b(v32i8 _1, void * _2, int var) { + __builtin_lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} + __builtin_lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} + __builtin_lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +} + +void xvstelm_h(v16i16 _1, void * _2, int var) { + __builtin_lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} + __builtin_lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} + __builtin_lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +} + +void xvstelm_w(v8i32 _1, void * _2, int var) { + __builtin_lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} + __builtin_lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} + __builtin_lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +} + +void xvstelm_d(v4i64 _1, void * _2, int var) { + __builtin_lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} + __builtin_lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} + __builtin_lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +} + +void xvstelm_b_idx(v32i8 _1, void * _2, int var) { + __builtin_lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + __builtin_lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __builtin_lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +} + +void xvstelm_h_idx(v16i16 _1, void * _2, int var) { + __builtin_lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + __builtin_lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __builtin_lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +} + +void xvstelm_w_idx(v8i32 _1, void * _2, int var) { + __builtin_lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + __builtin_lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __builtin_lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +} + +void xvstelm_d_idx(v4i64 _1, void * _2, int var) { + __builtin_lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __builtin_lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __builtin_lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +} + +v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} + return res; +} + +v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} + return res; +} + +v8i32 xvpickve_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} + return res; +} + +v4i64 xvpickve_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} + return res; +} + +v4i64 xvldi(int var) { + v4i64 res = __builtin_lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + res |= __builtin_lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} + res |= __builtin_lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} + return res; +} + +v8i32 xvinsgr2vr_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} + return res; +} + +v4i64 xvinsgr2vr_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} + return res; +} + +v32i8 xvldrepl_b(void *_1, int var) { + v32i8 res = __builtin_lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __builtin_lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __builtin_lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} + return res; +} + +v16i16 xvldrepl_h(void *_1, int var) { + v16i16 res = __builtin_lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} + res |= __builtin_lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} + res |= __builtin_lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} + return res; +} + +v8i32 xvldrepl_w(void *_1, int var) { + v8i32 res = __builtin_lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} + res |= __builtin_lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} + res |= __builtin_lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} + return res; +} + +v4i64 xvldrepl_d(void *_1, int var) { + v4i64 res = __builtin_lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} + res |= __builtin_lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} + res |= __builtin_lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} + return res; +} + +int xvpickve2gr_w(v8i32 _1, int var) { + int res = __builtin_lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} + return res; +} + +unsigned int xvpickve2gr_wu(v8i32 _1, int var) { + unsigned int res = __builtin_lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} + return res; +} + +long xvpickve2gr_d(v4i64 _1, int var) { + long res = __builtin_lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} + return res; +} + +unsigned long int xvpickve2gr_du(v4i64 _1, int var) { + unsigned long int res = __builtin_lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} + return res; +} + +v32i8 xvrotri_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} + return res; +} + +v16i16 xvrotri_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} + return res; +} + +v8i32 xvrotri_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} + return res; +} + +v4i64 xvrotri_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} + return res; +} + +v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __builtin_lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __builtin_lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __builtin_lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __builtin_lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} + return res; +} + +v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __builtin_lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __builtin_lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __builtin_lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __builtin_lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} + return res; +} + +v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} + return res; +} + +v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __builtin_lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __builtin_lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __builtin_lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __builtin_lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} + return res; +} + +v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __builtin_lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __builtin_lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __builtin_lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __builtin_lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} + return res; +} + +v4f64 xvpickve_d_f(v4f64 _1, int var) { + v4f64 res = __builtin_lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res += __builtin_lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res += __builtin_lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} + return res; +} + +v8f32 xvpickve_w_f(v8f32 _1, int var) { + v8f32 res = __builtin_lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res += __builtin_lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res += __builtin_lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} + return res; +} + +v32i8 xvrepli_b(int var) { + v32i8 res = __builtin_lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} + return res; +} + +v4i64 xvrepli_d(int var) { + v4i64 res = __builtin_lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} + return res; +} + +v16i16 xvrepli_h(int var) { + v16i16 res = __builtin_lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} + return res; +} + +v8i32 xvrepli_w(int var) { + v8i32 res = __builtin_lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} + return res; +} diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c new file mode 100644 index 00000000000000..0185f2004d5265 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c @@ -0,0 +1,4452 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +// CHECK-LABEL: @xvsll_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } +// CHECK-LABEL: @xvsll_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } +// CHECK-LABEL: @xvsll_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } +// CHECK-LABEL: @xvsll_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } +// CHECK-LABEL: @xvslli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } +// CHECK-LABEL: @xvslli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } +// CHECK-LABEL: @xvslli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } +// CHECK-LABEL: @xvslli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } +// CHECK-LABEL: @xvsra_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } +// CHECK-LABEL: @xvsra_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } +// CHECK-LABEL: @xvsra_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } +// CHECK-LABEL: @xvsra_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } +// CHECK-LABEL: @xvsrai_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } +// CHECK-LABEL: @xvsrai_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } +// CHECK-LABEL: @xvsrai_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } +// CHECK-LABEL: @xvsrai_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } +// CHECK-LABEL: @xvsrar_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } +// CHECK-LABEL: @xvsrar_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } +// CHECK-LABEL: @xvsrar_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } +// CHECK-LABEL: @xvsrar_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } +// CHECK-LABEL: @xvsrari_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } +// CHECK-LABEL: @xvsrari_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } +// CHECK-LABEL: @xvsrari_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } +// CHECK-LABEL: @xvsrari_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } +// CHECK-LABEL: @xvsrl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } +// CHECK-LABEL: @xvsrl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } +// CHECK-LABEL: @xvsrl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } +// CHECK-LABEL: @xvsrl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } +// CHECK-LABEL: @xvsrli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } +// CHECK-LABEL: @xvsrli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } +// CHECK-LABEL: @xvsrli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } +// CHECK-LABEL: @xvsrli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } +// CHECK-LABEL: @xvsrlr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } +// CHECK-LABEL: @xvsrlr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } +// CHECK-LABEL: @xvsrlr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } +// CHECK-LABEL: @xvsrlr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } +// CHECK-LABEL: @xvsrlri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } +// CHECK-LABEL: @xvsrlri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } +// CHECK-LABEL: @xvsrlri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } +// CHECK-LABEL: @xvsrlri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } +// CHECK-LABEL: @xvbitclr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } +// CHECK-LABEL: @xvbitclr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } +// CHECK-LABEL: @xvbitclr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } +// CHECK-LABEL: @xvbitclr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } +// CHECK-LABEL: @xvbitclri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } +// CHECK-LABEL: @xvbitclri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } +// CHECK-LABEL: @xvbitclri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } +// CHECK-LABEL: @xvbitclri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } +// CHECK-LABEL: @xvbitset_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } +// CHECK-LABEL: @xvbitset_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } +// CHECK-LABEL: @xvbitset_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } +// CHECK-LABEL: @xvbitset_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } +// CHECK-LABEL: @xvbitseti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } +// CHECK-LABEL: @xvbitseti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } +// CHECK-LABEL: @xvbitseti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } +// CHECK-LABEL: @xvbitseti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } +// CHECK-LABEL: @xvbitrev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } +// CHECK-LABEL: @xvbitrev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } +// CHECK-LABEL: @xvbitrev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } +// CHECK-LABEL: @xvbitrev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } +// CHECK-LABEL: @xvbitrevi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } +// CHECK-LABEL: @xvbitrevi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } +// CHECK-LABEL: @xvbitrevi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } +// CHECK-LABEL: @xvbitrevi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } +// CHECK-LABEL: @xvadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } +// CHECK-LABEL: @xvadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } +// CHECK-LABEL: @xvadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } +// CHECK-LABEL: @xvadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } +// CHECK-LABEL: @xvaddi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } +// CHECK-LABEL: @xvaddi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } +// CHECK-LABEL: @xvaddi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } +// CHECK-LABEL: @xvaddi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } +// CHECK-LABEL: @xvsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } +// CHECK-LABEL: @xvsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } +// CHECK-LABEL: @xvsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } +// CHECK-LABEL: @xvsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } +// CHECK-LABEL: @xvsubi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } +// CHECK-LABEL: @xvsubi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } +// CHECK-LABEL: @xvsubi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } +// CHECK-LABEL: @xvsubi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } +// CHECK-LABEL: @xvmax_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } +// CHECK-LABEL: @xvmax_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } +// CHECK-LABEL: @xvmax_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } +// CHECK-LABEL: @xvmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } +// CHECK-LABEL: @xvmaxi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } +// CHECK-LABEL: @xvmaxi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } +// CHECK-LABEL: @xvmaxi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } +// CHECK-LABEL: @xvmaxi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } +// CHECK-LABEL: @xvmax_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } +// CHECK-LABEL: @xvmax_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } +// CHECK-LABEL: @xvmax_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } +// CHECK-LABEL: @xvmax_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } +// CHECK-LABEL: @xvmaxi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } +// CHECK-LABEL: @xvmaxi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } +// CHECK-LABEL: @xvmaxi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } +// CHECK-LABEL: @xvmaxi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } +// CHECK-LABEL: @xvmin_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } +// CHECK-LABEL: @xvmin_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } +// CHECK-LABEL: @xvmin_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } +// CHECK-LABEL: @xvmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } +// CHECK-LABEL: @xvmini_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } +// CHECK-LABEL: @xvmini_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } +// CHECK-LABEL: @xvmini_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } +// CHECK-LABEL: @xvmini_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } +// CHECK-LABEL: @xvmin_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } +// CHECK-LABEL: @xvmin_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } +// CHECK-LABEL: @xvmin_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } +// CHECK-LABEL: @xvmin_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } +// CHECK-LABEL: @xvmini_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } +// CHECK-LABEL: @xvmini_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } +// CHECK-LABEL: @xvmini_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } +// CHECK-LABEL: @xvmini_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } +// CHECK-LABEL: @xvseq_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } +// CHECK-LABEL: @xvseq_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } +// CHECK-LABEL: @xvseq_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } +// CHECK-LABEL: @xvseq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } +// CHECK-LABEL: @xvseqi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } +// CHECK-LABEL: @xvseqi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } +// CHECK-LABEL: @xvseqi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } +// CHECK-LABEL: @xvseqi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } +// CHECK-LABEL: @xvslt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } +// CHECK-LABEL: @xvslt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } +// CHECK-LABEL: @xvslt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } +// CHECK-LABEL: @xvslt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } +// CHECK-LABEL: @xvslti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } +// CHECK-LABEL: @xvslti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } +// CHECK-LABEL: @xvslti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } +// CHECK-LABEL: @xvslti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } +// CHECK-LABEL: @xvslt_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } +// CHECK-LABEL: @xvslt_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } +// CHECK-LABEL: @xvslt_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } +// CHECK-LABEL: @xvslt_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } +// CHECK-LABEL: @xvslti_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } +// CHECK-LABEL: @xvslti_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } +// CHECK-LABEL: @xvslti_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } +// CHECK-LABEL: @xvslti_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } +// CHECK-LABEL: @xvsle_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } +// CHECK-LABEL: @xvsle_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } +// CHECK-LABEL: @xvsle_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } +// CHECK-LABEL: @xvsle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } +// CHECK-LABEL: @xvslei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } +// CHECK-LABEL: @xvslei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } +// CHECK-LABEL: @xvslei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } +// CHECK-LABEL: @xvslei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } +// CHECK-LABEL: @xvsle_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } +// CHECK-LABEL: @xvsle_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } +// CHECK-LABEL: @xvsle_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } +// CHECK-LABEL: @xvsle_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } +// CHECK-LABEL: @xvslei_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } +// CHECK-LABEL: @xvslei_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } +// CHECK-LABEL: @xvslei_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } +// CHECK-LABEL: @xvslei_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } +// CHECK-LABEL: @xvsat_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } +// CHECK-LABEL: @xvsat_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } +// CHECK-LABEL: @xvsat_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } +// CHECK-LABEL: @xvsat_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } +// CHECK-LABEL: @xvsat_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } +// CHECK-LABEL: @xvsat_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } +// CHECK-LABEL: @xvsat_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } +// CHECK-LABEL: @xvsat_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } +// CHECK-LABEL: @xvadda_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } +// CHECK-LABEL: @xvadda_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } +// CHECK-LABEL: @xvadda_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } +// CHECK-LABEL: @xvadda_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } +// CHECK-LABEL: @xvsadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } +// CHECK-LABEL: @xvsadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } +// CHECK-LABEL: @xvsadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } +// CHECK-LABEL: @xvsadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } +// CHECK-LABEL: @xvsadd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } +// CHECK-LABEL: @xvsadd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } +// CHECK-LABEL: @xvsadd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } +// CHECK-LABEL: @xvsadd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } +// CHECK-LABEL: @xvavg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } +// CHECK-LABEL: @xvavg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } +// CHECK-LABEL: @xvavg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } +// CHECK-LABEL: @xvavg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } +// CHECK-LABEL: @xvavg_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } +// CHECK-LABEL: @xvavg_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } +// CHECK-LABEL: @xvavg_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } +// CHECK-LABEL: @xvavg_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } +// CHECK-LABEL: @xvavgr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } +// CHECK-LABEL: @xvavgr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } +// CHECK-LABEL: @xvavgr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } +// CHECK-LABEL: @xvavgr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } +// CHECK-LABEL: @xvavgr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } +// CHECK-LABEL: @xvavgr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } +// CHECK-LABEL: @xvavgr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } +// CHECK-LABEL: @xvavgr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } +// CHECK-LABEL: @xvssub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } +// CHECK-LABEL: @xvssub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } +// CHECK-LABEL: @xvssub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } +// CHECK-LABEL: @xvssub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } +// CHECK-LABEL: @xvssub_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } +// CHECK-LABEL: @xvssub_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } +// CHECK-LABEL: @xvssub_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } +// CHECK-LABEL: @xvssub_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } +// CHECK-LABEL: @xvabsd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } +// CHECK-LABEL: @xvabsd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } +// CHECK-LABEL: @xvabsd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } +// CHECK-LABEL: @xvabsd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } +// CHECK-LABEL: @xvabsd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } +// CHECK-LABEL: @xvabsd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } +// CHECK-LABEL: @xvabsd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } +// CHECK-LABEL: @xvabsd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } +// CHECK-LABEL: @xvmul_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } +// CHECK-LABEL: @xvmul_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } +// CHECK-LABEL: @xvmul_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } +// CHECK-LABEL: @xvmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } +// CHECK-LABEL: @xvmadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvdiv_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } +// CHECK-LABEL: @xvdiv_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } +// CHECK-LABEL: @xvdiv_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } +// CHECK-LABEL: @xvdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } +// CHECK-LABEL: @xvdiv_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } +// CHECK-LABEL: @xvdiv_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } +// CHECK-LABEL: @xvdiv_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } +// CHECK-LABEL: @xvdiv_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } +// CHECK-LABEL: @xvhaddw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } +// CHECK-LABEL: @xvhaddw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } +// CHECK-LABEL: @xvhaddw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } +// CHECK-LABEL: @xvhaddw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } +// CHECK-LABEL: @xvhaddw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } +// CHECK-LABEL: @xvhaddw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } +// CHECK-LABEL: @xvhsubw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } +// CHECK-LABEL: @xvhsubw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } +// CHECK-LABEL: @xvhsubw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } +// CHECK-LABEL: @xvhsubw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } +// CHECK-LABEL: @xvhsubw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } +// CHECK-LABEL: @xvhsubw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } +// CHECK-LABEL: @xvmod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } +// CHECK-LABEL: @xvmod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } +// CHECK-LABEL: @xvmod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } +// CHECK-LABEL: @xvmod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } +// CHECK-LABEL: @xvmod_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } +// CHECK-LABEL: @xvmod_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } +// CHECK-LABEL: @xvmod_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } +// CHECK-LABEL: @xvmod_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } +// CHECK-LABEL: @xvrepl128vei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } +// CHECK-LABEL: @xvpickev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } +// CHECK-LABEL: @xvpickev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } +// CHECK-LABEL: @xvpickev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } +// CHECK-LABEL: @xvpickev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } +// CHECK-LABEL: @xvpickod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } +// CHECK-LABEL: @xvpickod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } +// CHECK-LABEL: @xvpickod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } +// CHECK-LABEL: @xvpickod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } +// CHECK-LABEL: @xvilvh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } +// CHECK-LABEL: @xvilvh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } +// CHECK-LABEL: @xvilvh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } +// CHECK-LABEL: @xvilvh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } +// CHECK-LABEL: @xvilvl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } +// CHECK-LABEL: @xvilvl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } +// CHECK-LABEL: @xvilvl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } +// CHECK-LABEL: @xvilvl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } +// CHECK-LABEL: @xvpackev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } +// CHECK-LABEL: @xvpackev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } +// CHECK-LABEL: @xvpackev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } +// CHECK-LABEL: @xvpackev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } +// CHECK-LABEL: @xvpackod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } +// CHECK-LABEL: @xvpackod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } +// CHECK-LABEL: @xvpackod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } +// CHECK-LABEL: @xvpackod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } +// CHECK-LABEL: @xvshuf_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } +// CHECK-LABEL: @xvand_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } +// CHECK-LABEL: @xvandi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } +// CHECK-LABEL: @xvor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } +// CHECK-LABEL: @xvori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } +// CHECK-LABEL: @xvnor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } +// CHECK-LABEL: @xvnori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } +// CHECK-LABEL: @xvxor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } +// CHECK-LABEL: @xvxori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } +// CHECK-LABEL: @xvbitsel_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } +// CHECK-LABEL: @xvbitseli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } +// CHECK-LABEL: @xvshuf4i_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } +// CHECK-LABEL: @xvshuf4i_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } +// CHECK-LABEL: @xvshuf4i_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } +// CHECK-LABEL: @xvreplgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } +// CHECK-LABEL: @xvreplgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } +// CHECK-LABEL: @xvreplgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } +// CHECK-LABEL: @xvreplgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } +// CHECK-LABEL: @xvpcnt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } +// CHECK-LABEL: @xvpcnt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } +// CHECK-LABEL: @xvpcnt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } +// CHECK-LABEL: @xvpcnt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } +// CHECK-LABEL: @xvclo_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } +// CHECK-LABEL: @xvclo_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } +// CHECK-LABEL: @xvclo_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } +// CHECK-LABEL: @xvclo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } +// CHECK-LABEL: @xvclz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } +// CHECK-LABEL: @xvclz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } +// CHECK-LABEL: @xvclz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } +// CHECK-LABEL: @xvclz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } +// CHECK-LABEL: @xvfadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } +// CHECK-LABEL: @xvfadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } +// CHECK-LABEL: @xvfsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } +// CHECK-LABEL: @xvfsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } +// CHECK-LABEL: @xvfmul_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } +// CHECK-LABEL: @xvfmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } +// CHECK-LABEL: @xvfdiv_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } +// CHECK-LABEL: @xvfdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } +// CHECK-LABEL: @xvfcvt_h_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } +// CHECK-LABEL: @xvfcvt_s_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } +// CHECK-LABEL: @xvfmin_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } +// CHECK-LABEL: @xvfmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } +// CHECK-LABEL: @xvfmina_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } +// CHECK-LABEL: @xvfmina_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } +// CHECK-LABEL: @xvfmax_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } +// CHECK-LABEL: @xvfmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } +// CHECK-LABEL: @xvfmaxa_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } +// CHECK-LABEL: @xvfmaxa_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } +// CHECK-LABEL: @xvfclass_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } +// CHECK-LABEL: @xvfclass_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } +// CHECK-LABEL: @xvfsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } +// CHECK-LABEL: @xvfsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } +// CHECK-LABEL: @xvfrecip_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } +// CHECK-LABEL: @xvfrecip_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } +// CHECK-LABEL: @xvfrint_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } +// CHECK-LABEL: @xvfrint_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } +// CHECK-LABEL: @xvfrsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } +// CHECK-LABEL: @xvfrsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } +// CHECK-LABEL: @xvflogb_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } +// CHECK-LABEL: @xvflogb_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } +// CHECK-LABEL: @xvfcvth_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } +// CHECK-LABEL: @xvfcvth_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } +// CHECK-LABEL: @xvfcvtl_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } +// CHECK-LABEL: @xvfcvtl_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } +// CHECK-LABEL: @xvftint_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } +// CHECK-LABEL: @xvftint_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } +// CHECK-LABEL: @xvftint_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } +// CHECK-LABEL: @xvftint_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } +// CHECK-LABEL: @xvftintrz_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } +// CHECK-LABEL: @xvftintrz_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } +// CHECK-LABEL: @xvftintrz_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } +// CHECK-LABEL: @xvftintrz_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } +// CHECK-LABEL: @xvffint_s_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } +// CHECK-LABEL: @xvffint_d_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } +// CHECK-LABEL: @xvffint_s_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } +// CHECK-LABEL: @xvffint_d_lu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } +// CHECK-LABEL: @xvreplve_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } +// CHECK-LABEL: @xvreplve_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } +// CHECK-LABEL: @xvreplve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } +// CHECK-LABEL: @xvreplve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1:%.*]], i32 [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } +// CHECK-LABEL: @xvpermi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } +// CHECK-LABEL: @xvandn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } +// CHECK-LABEL: @xvneg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } +// CHECK-LABEL: @xvneg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } +// CHECK-LABEL: @xvneg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } +// CHECK-LABEL: @xvneg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } +// CHECK-LABEL: @xvmuh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } +// CHECK-LABEL: @xvmuh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } +// CHECK-LABEL: @xvmuh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } +// CHECK-LABEL: @xvmuh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } +// CHECK-LABEL: @xvmuh_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } +// CHECK-LABEL: @xvmuh_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } +// CHECK-LABEL: @xvmuh_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } +// CHECK-LABEL: @xvmuh_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } +// CHECK-LABEL: @xvsllwil_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } +// CHECK-LABEL: @xvsllwil_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } +// CHECK-LABEL: @xvsllwil_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } +// CHECK-LABEL: @xvsllwil_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } +// CHECK-LABEL: @xvsllwil_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } +// CHECK-LABEL: @xvsllwil_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } +// CHECK-LABEL: @xvsran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } +// CHECK-LABEL: @xvsran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } +// CHECK-LABEL: @xvsran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } +// CHECK-LABEL: @xvssran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } +// CHECK-LABEL: @xvssran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } +// CHECK-LABEL: @xvssran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } +// CHECK-LABEL: @xvssran_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } +// CHECK-LABEL: @xvssran_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } +// CHECK-LABEL: @xvssran_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } +// CHECK-LABEL: @xvsrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } +// CHECK-LABEL: @xvsrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } +// CHECK-LABEL: @xvssrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } +// CHECK-LABEL: @xvssrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrarn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrarn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrarn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } +// CHECK-LABEL: @xvsrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } +// CHECK-LABEL: @xvsrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } +// CHECK-LABEL: @xvssrln_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrln_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrln_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } +// CHECK-LABEL: @xvsrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } +// CHECK-LABEL: @xvsrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrlrn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrlrn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrlrn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } +// CHECK-LABEL: @xvfrstpi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } +// CHECK-LABEL: @xvfrstpi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } +// CHECK-LABEL: @xvfrstp_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } +// CHECK-LABEL: @xvfrstp_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } +// CHECK-LABEL: @xvshuf4i_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } +// CHECK-LABEL: @xvbsrl_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } +// CHECK-LABEL: @xvbsll_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } +// CHECK-LABEL: @xvextrins_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } +// CHECK-LABEL: @xvmskltz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } +// CHECK-LABEL: @xvmskltz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } +// CHECK-LABEL: @xvmskltz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } +// CHECK-LABEL: @xvmskltz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } +// CHECK-LABEL: @xvsigncov_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } +// CHECK-LABEL: @xvsigncov_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } +// CHECK-LABEL: @xvsigncov_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } +// CHECK-LABEL: @xvsigncov_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } +// CHECK-LABEL: @xvfmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } +// CHECK-LABEL: @xvfmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvfmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } +// CHECK-LABEL: @xvfmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvfnmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } +// CHECK-LABEL: @xvfnmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvfnmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]], <8 x float> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } +// CHECK-LABEL: @xvfnmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]], <4 x double> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvftintrne_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } +// CHECK-LABEL: @xvftintrne_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } +// CHECK-LABEL: @xvftintrp_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } +// CHECK-LABEL: @xvftintrp_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } +// CHECK-LABEL: @xvftintrm_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } +// CHECK-LABEL: @xvftintrm_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } +// CHECK-LABEL: @xvftint_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } +// CHECK-LABEL: @xvffint_s_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } +// CHECK-LABEL: @xvftintrz_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrp_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrm_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrne_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } +// CHECK-LABEL: @xvftinth_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } +// CHECK-LABEL: @xvftintl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } +// CHECK-LABEL: @xvffinth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } +// CHECK-LABEL: @xvffintl_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } +// CHECK-LABEL: @xvftintrzh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } +// CHECK-LABEL: @xvftintrzl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } +// CHECK-LABEL: @xvftintrph_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } +// CHECK-LABEL: @xvftintrpl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } +// CHECK-LABEL: @xvftintrmh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } +// CHECK-LABEL: @xvftintrml_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } +// CHECK-LABEL: @xvftintrneh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } +// CHECK-LABEL: @xvftintrnel_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } +// CHECK-LABEL: @xvfrintrne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +// CHECK-NEXT: ret <8 x i32> [[TMP1]] +// +v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } +// CHECK-LABEL: @xvfrintrne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP1]] +// +v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } +// CHECK-LABEL: @xvfrintrz_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +// CHECK-NEXT: ret <8 x i32> [[TMP1]] +// +v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } +// CHECK-LABEL: @xvfrintrz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP1]] +// +v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } +// CHECK-LABEL: @xvfrintrp_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +// CHECK-NEXT: ret <8 x i32> [[TMP1]] +// +v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } +// CHECK-LABEL: @xvfrintrp_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP1]] +// +v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } +// CHECK-LABEL: @xvfrintrm_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[TMP0]] to <8 x i32> +// CHECK-NEXT: ret <8 x i32> [[TMP1]] +// +v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } +// CHECK-LABEL: @xvfrintrm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[TMP0]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP1]] +// +v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } +// CHECK-LABEL: @xvld( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } +// CHECK-LABEL: @xvst( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1) +// CHECK-NEXT: ret void +// +void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } +// CHECK-LABEL: @xvstelm_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } +// CHECK-LABEL: @xvstelm_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1:%.*]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } +// CHECK-LABEL: @xvstelm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1:%.*]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } +// CHECK-LABEL: @xvstelm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1:%.*]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } +// CHECK-LABEL: @xvinsve0_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } +// CHECK-LABEL: @xvinsve0_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } +// CHECK-LABEL: @xvpickve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } +// CHECK-LABEL: @xvpickve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } +// CHECK-LABEL: @xvssrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } +// CHECK-LABEL: @xvssrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } +// CHECK-LABEL: @xvssrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } +// CHECK-LABEL: @xvssrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } +// CHECK-LABEL: @xvssrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } +// CHECK-LABEL: @xvorn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } +// CHECK-LABEL: @xvldi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvldi() { return __builtin_lasx_xvldi(1); } +// CHECK-LABEL: @xvldx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } +// CHECK-LABEL: @xvstx( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1:%.*]], ptr [[_2:%.*]], i64 1) +// CHECK-NEXT: ret void +// +void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } +// CHECK-LABEL: @xvextl_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } +// CHECK-LABEL: @xvinsgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } +// CHECK-LABEL: @xvinsgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1:%.*]], i64 1, i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } +// CHECK-LABEL: @xvreplve0_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } +// CHECK-LABEL: @xvreplve0_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } +// CHECK-LABEL: @xvreplve0_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } +// CHECK-LABEL: @xvreplve0_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } +// CHECK-LABEL: @xvreplve0_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } +// CHECK-LABEL: @vext2xv_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } +// CHECK-LABEL: @vext2xv_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } +// CHECK-LABEL: @vext2xv_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } +// CHECK-LABEL: @vext2xv_w_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } +// CHECK-LABEL: @vext2xv_d_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } +// CHECK-LABEL: @vext2xv_d_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } +// CHECK-LABEL: @vext2xv_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } +// CHECK-LABEL: @vext2xv_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } +// CHECK-LABEL: @vext2xv_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } +// CHECK-LABEL: @vext2xv_wu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } +// CHECK-LABEL: @vext2xv_du_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } +// CHECK-LABEL: @vext2xv_du_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } +// CHECK-LABEL: @xvpermi_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } +// CHECK-LABEL: @xvpermi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } +// CHECK-LABEL: @xvperm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } +// CHECK-LABEL: @xvldrepl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } +// CHECK-LABEL: @xvldrepl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } +// CHECK-LABEL: @xvldrepl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } +// CHECK-LABEL: @xvldrepl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } +// CHECK-LABEL: @xvpickve2gr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i32 [[TMP0]] +// +unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i64 [[TMP0]] +// +long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret i64 [[TMP0]] +// +unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } +// CHECK-LABEL: @xvaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } +// CHECK-LABEL: @xvaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvsubwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } +// CHECK-LABEL: @xvsubwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } +// CHECK-LABEL: @xvsubwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } +// CHECK-LABEL: @xvsubwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } +// CHECK-LABEL: @xvsubwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } +// CHECK-LABEL: @xvsubwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvsubwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvsubwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvsubwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } +// CHECK-LABEL: @xvsubwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } +// CHECK-LABEL: @xvsubwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } +// CHECK-LABEL: @xvsubwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } +// CHECK-LABEL: @xvsubwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } +// CHECK-LABEL: @xvsubwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvsubwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvsubwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvhaddw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } +// CHECK-LABEL: @xvhaddw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } +// CHECK-LABEL: @xvhsubw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } +// CHECK-LABEL: @xvhsubw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } +// CHECK-LABEL: @xvmaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], <4 x i64> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1:%.*]], <8 x i32> [[_2:%.*]], <8 x i32> [[_3:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1:%.*]], <16 x i16> [[_2:%.*]], <16 x i16> [[_3:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1:%.*]], <32 x i8> [[_2:%.*]], <32 x i8> [[_3:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } +// CHECK-LABEL: @xvrotr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } +// CHECK-LABEL: @xvrotr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } +// CHECK-LABEL: @xvrotr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } +// CHECK-LABEL: @xvrotr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } +// CHECK-LABEL: @xvadd_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } +// CHECK-LABEL: @xvsub_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } +// CHECK-LABEL: @xvaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmskgez_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } +// CHECK-LABEL: @xvmsknz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } +// CHECK-LABEL: @xvexth_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } +// CHECK-LABEL: @xvexth_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } +// CHECK-LABEL: @xvexth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } +// CHECK-LABEL: @xvexth_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } +// CHECK-LABEL: @xvexth_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } +// CHECK-LABEL: @xvexth_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } +// CHECK-LABEL: @xvexth_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } +// CHECK-LABEL: @xvexth_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } +// CHECK-LABEL: @xvrotri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } +// CHECK-LABEL: @xvrotri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } +// CHECK-LABEL: @xvrotri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } +// CHECK-LABEL: @xvrotri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } +// CHECK-LABEL: @xvextl_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } +// CHECK-LABEL: @xvsrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]], i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xbnz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } +// CHECK-LABEL: @xbnz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } +// CHECK-LABEL: @xbnz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } +// CHECK-LABEL: @xbnz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } +// CHECK-LABEL: @xbnz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } +// CHECK-LABEL: @xbz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } +// CHECK-LABEL: @xbz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } +// CHECK-LABEL: @xbz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } +// CHECK-LABEL: @xbz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } +// CHECK-LABEL: @xbz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1:%.*]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } +// CHECK-LABEL: @xvfcmp_caf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_caf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_ceq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_ceq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_clt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_clt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_saf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_saf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_seq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_seq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_slt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_slt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1:%.*]], <4 x double> [[_2:%.*]]) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1:%.*]], <8 x float> [[_2:%.*]]) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } +// CHECK-LABEL: @xvpickve_d_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <4 x double> [[TMP0]] +// +v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } +// CHECK-LABEL: @xvpickve_w_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1:%.*]], i32 1) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } +// CHECK-LABEL: @xvrepli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +// CHECK-NEXT: ret <32 x i8> [[TMP0]] +// +v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } +// CHECK-LABEL: @xvrepli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +// CHECK-NEXT: ret <4 x i64> [[TMP0]] +// +v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } +// CHECK-LABEL: @xvrepli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +// CHECK-NEXT: ret <16 x i16> [[TMP0]] +// +v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } +// CHECK-LABEL: @xvrepli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } From 1aaac0129cb1a5c7de74265bed7f6c940a625642 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 31 Oct 2023 07:54:27 +0000 Subject: [PATCH 126/144] [gn build] Port a4005e729c8d --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index a70ff97299aa0a..bb503ddf571e78 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -201,6 +201,7 @@ copy("Headers") { "iso646.h", "keylockerintrin.h", "larchintrin.h", + "lasxintrin.h", "limits.h", "llvm_libc_wrappers/assert.h", "llvm_libc_wrappers/ctype.h", From f3fd1ecd5759cfa7f368a2e0a876ca7cddcc8b2b Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 31 Oct 2023 07:54:27 +0000 Subject: [PATCH 127/144] [gn build] Port d6bfa3341181 --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index bb503ddf571e78..20135221fdacde 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -210,6 +210,7 @@ copy("Headers") { "llvm_libc_wrappers/stdlib.h", "llvm_libc_wrappers/string.h", "llvm_libc_wrappers/time.h", + "lsxintrin.h", "lwpintrin.h", "lzcntintrin.h", "mm3dnow.h", From d473e2c124df5adfdb513de2044424ebbfc72bf7 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 31 Oct 2023 09:05:12 +0100 Subject: [PATCH 128/144] [RISCV][MCA] Remove unnecessary -debug flag from test (NFC) This test doesn't appear to actually use -debug output, and the flag makes the test fail on non-assert builds. --- llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s b/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s index 8b52d0ece63593..41a6935873ad3c 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFive7/strided-load-x0.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -debug -mtriple=riscv64 -mcpu=sifive-x280 -iterations=1 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -iterations=1 < %s | FileCheck %s vsetvli zero, zero, e32, m1, tu, mu From d56eb823fc772bcfea266275da98cfe607b05b7f Mon Sep 17 00:00:00 2001 From: Paulo Matos Date: Tue, 31 Oct 2023 09:08:35 +0100 Subject: [PATCH 129/144] [NFC] Remote TODO regarding opaque ptr conversion --- llvm/include/llvm/IR/Type.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index c38078cc6087ef..63414cac476079 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -482,9 +482,6 @@ class Type { //===--------------------------------------------------------------------===// // Convenience methods for getting pointer types. // - - // TODO: After opaque pointer transition this can be replaced by simply - // calling PointerType::get(C, AS). static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0); static Type *getWasm_ExternrefTy(LLVMContext &C); From dbd4a0dd38eb03df4f7d55c780b3dd6cb15a270d Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Tue, 31 Oct 2023 09:22:44 +0100 Subject: [PATCH 130/144] [MLIR][GPUCommon] Remove typed pointer support (#70735) This commit removes the GPUCommon's lowering support for typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect. Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502 --- mlir/include/mlir/Conversion/Passes.td | 5 +- .../include/mlir/Dialect/LLVMIR/LLVMDialect.h | 2 +- .../GPUCommon/GPUToLLVMConversion.cpp | 145 +++++------------- ...ower-2to4-sparse-to-gpu-runtime-calls.mlir | 2 +- .../lower-alloc-to-gpu-runtime-calls.mlir | 2 +- ...ower-launch-func-to-gpu-runtime-calls.mlir | 4 +- .../lower-memcpy-to-gpu-runtime-calls.mlir | 2 +- .../lower-memset-to-gpu-runtime-calls.mlir | 2 +- .../lower-sparse-to-gpu-runtime-calls.mlir | 2 +- .../lower-wait-to-gpu-runtime-calls.mlir | 2 +- .../Conversion/GPUCommon/transfer_write.mlir | 2 +- .../Conversion/GPUCommon/typed-pointers.mlir | 82 ---------- 12 files changed, 50 insertions(+), 202 deletions(-) delete mode 100644 mlir/test/Conversion/GPUCommon/typed-pointers.mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index ba7dc642af2a07..036c9b0039779a 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -449,10 +449,7 @@ def GpuToLLVMConversionPass : Pass<"gpu-to-llvm", "ModuleOp"> { Option<"gpuBinaryAnnotation", "gpu-binary-annotation", "std::string", /*default=*/"gpu::getDefaultGpuBinaryAnnotation()", "Annotation attribute string for GPU binary" - >, - Option<"useOpaquePointers", "use-opaque-pointers", "bool", - /*default=*/"true", "Generate LLVM IR using opaque pointers " - "instead of typed pointers">, + > ]; let dependentDialects = [ diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h index 447e3c9a59e5c0..bbed1ea5cf6220 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h @@ -210,7 +210,7 @@ class GEPIndicesAdaptor { /// string (operations inserted at the builder insertion point). Value createGlobalString(Location loc, OpBuilder &builder, StringRef name, StringRef value, Linkage linkage, - bool useOpaquePointers); + bool useOpaquePointers = true); /// LLVM requires some operations to be inside of a Module operation. This /// function confirms that the Operation has the desired properties. diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp index 12bd02050be036..7bac8f5a8f0e03 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp @@ -82,19 +82,12 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern { MLIRContext *context = &this->getTypeConverter()->getContext(); Type llvmVoidType = LLVM::LLVMVoidType::get(context); - LLVM::LLVMPointerType llvmPointerType = - this->getTypeConverter()->getPointerType(IntegerType::get(context, 8)); - Type llvmPointerPointerType = - this->getTypeConverter()->getPointerType(llvmPointerType); + LLVM::LLVMPointerType llvmPointerType = LLVM::LLVMPointerType::get(context); Type llvmInt8Type = IntegerType::get(context, 8); Type llvmInt16Type = IntegerType::get(context, 16); Type llvmInt32Type = IntegerType::get(context, 32); Type llvmInt64Type = IntegerType::get(context, 64); Type llvmFloat32Type = Float32Type::get(context); - Type llvmInt8PointerType = - this->getTypeConverter()->getPointerType(llvmInt8Type); - Type llvmInt64PointerType = - this->getTypeConverter()->getPointerType(llvmInt64Type); Type llvmIntPtrType = IntegerType::get( context, this->getTypeConverter()->getPointerBitwidth(0)); @@ -115,18 +108,18 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern { "mgpuLaunchKernel", llvmVoidType, { - llvmPointerType, /* void* f */ - llvmIntPtrType, /* intptr_t gridXDim */ - llvmIntPtrType, /* intptr_t gridyDim */ - llvmIntPtrType, /* intptr_t gridZDim */ - llvmIntPtrType, /* intptr_t blockXDim */ - llvmIntPtrType, /* intptr_t blockYDim */ - llvmIntPtrType, /* intptr_t blockZDim */ - llvmInt32Type, /* unsigned int sharedMemBytes */ - llvmPointerType, /* void *hstream */ - llvmPointerPointerType, /* void **kernelParams */ - llvmPointerPointerType, /* void **extra */ - llvmInt64Type /* size_t paramsCount */ + llvmPointerType, /* void* f */ + llvmIntPtrType, /* intptr_t gridXDim */ + llvmIntPtrType, /* intptr_t gridyDim */ + llvmIntPtrType, /* intptr_t gridZDim */ + llvmIntPtrType, /* intptr_t blockXDim */ + llvmIntPtrType, /* intptr_t blockYDim */ + llvmIntPtrType, /* intptr_t blockZDim */ + llvmInt32Type, /* unsigned int sharedMemBytes */ + llvmPointerType, /* void *hstream */ + llvmPointerType, /* void **kernelParams */ + llvmPointerType, /* void **extra */ + llvmInt64Type /* size_t paramsCount */ }}; FunctionCallBuilder streamCreateCallBuilder = { "mgpuStreamCreate", llvmPointerType /* void *stream */, {}}; @@ -588,7 +581,6 @@ DECLARE_CONVERT_OP_TO_GPU_RUNTIME_CALL_PATTERN(SetCsrPointersOp) void GpuToLLVMConversionPass::runOnOperation() { LowerToLLVMOptions options(&getContext()); - options.useOpaquePointers = useOpaquePointers; options.useBarePtrCallConv = hostBarePtrCallConv; LLVMTypeConverter converter(&getContext(), options); @@ -835,8 +827,6 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite( // Allocate the underlying buffer and store a pointer to it in the MemRef // descriptor. - Type elementPtrType = this->getElementPtrType(memRefType); - auto nullPtr = rewriter.create(loc, llvmPointerType); Value stream = adaptor.getAsyncDependencies().empty() ? nullPtr @@ -848,9 +838,6 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite( Value allocatedPtr = allocCallBuilder.create(loc, rewriter, {sizeBytes, stream, isHostShared}) .getResult(); - if (!getTypeConverter()->useOpaquePointers()) - allocatedPtr = - rewriter.create(loc, elementPtrType, allocatedPtr); // No alignment. Value alignedPtr = allocatedPtr; @@ -880,8 +867,6 @@ LogicalResult ConvertDeallocOpToGpuRuntimeCallPattern::matchAndRewrite( Value pointer = MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) - pointer = rewriter.create(loc, llvmPointerType, pointer); Value stream = adaptor.getAsyncDependencies().front(); deallocCallBuilder.create(loc, rewriter, {pointer, stream}); @@ -1035,24 +1020,21 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray( auto structType = LLVM::LLVMStructType::getNewIdentified(context, StringRef(), argumentTypes); auto one = builder.create(loc, llvmInt32Type, 1); - auto structPtr = builder.create( - loc, getTypeConverter()->getPointerType(structType), structType, one, - /*alignment=*/0); + auto structPtr = + builder.create(loc, llvmPointerType, structType, one, + /*alignment=*/0); auto arraySize = builder.create(loc, llvmInt32Type, numArguments); auto arrayPtr = builder.create( - loc, llvmPointerPointerType, llvmPointerType, arraySize, /*alignment=*/0); + loc, llvmPointerType, llvmPointerType, arraySize, /*alignment=*/0); for (const auto &en : llvm::enumerate(arguments)) { - Value fieldPtr = builder.create( - loc, getTypeConverter()->getPointerType(argumentTypes[en.index()]), - structType, structPtr, ArrayRef{0, en.index()}); + Value fieldPtr = + builder.create(loc, llvmPointerType, structType, structPtr, + ArrayRef{0, en.index()}); builder.create(loc, en.value(), fieldPtr); auto elementPtr = builder.create( - loc, llvmPointerPointerType, llvmPointerType, arrayPtr, + loc, llvmPointerType, llvmPointerType, arrayPtr, ArrayRef{en.index()}); - if (!getTypeConverter()->useOpaquePointers()) - fieldPtr = - builder.create(loc, llvmPointerType, fieldPtr); builder.create(loc, fieldPtr, elementPtr); } return arrayPtr; @@ -1079,7 +1061,7 @@ Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateKernelNameConstant( std::string(llvm::formatv("{0}_{1}_kernel_name", moduleName, name)); return LLVM::createGlobalString( loc, builder, globalName, StringRef(kernelName.data(), kernelName.size()), - LLVM::Linkage::Internal, getTypeConverter()->useOpaquePointers()); + LLVM::Linkage::Internal); } // Emits LLVM IR to launch a kernel function. Expects the module that contains @@ -1170,9 +1152,9 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite( SmallString<128> nameBuffer(kernelModule.getName()); nameBuffer.append(kGpuBinaryStorageSuffix); - Value data = LLVM::createGlobalString( - loc, rewriter, nameBuffer.str(), binaryAttr.getValue(), - LLVM::Linkage::Internal, getTypeConverter()->useOpaquePointers()); + Value data = + LLVM::createGlobalString(loc, rewriter, nameBuffer.str(), + binaryAttr.getValue(), LLVM::Linkage::Internal); // Pass the binary size. SPIRV requires binary size. auto gpuBlob = binaryAttr.getValue(); @@ -1205,7 +1187,7 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite( : adaptor.getAsyncDependencies().front(); // Create array of pointers to kernel arguments. auto kernelParams = generateParamsArray(launchOp, adaptor, rewriter); - auto nullpointer = rewriter.create(loc, llvmPointerPointerType); + auto nullpointer = rewriter.create(loc, llvmPointerType); Value dynamicSharedMemorySize = launchOp.getDynamicSharedMemorySize() ? launchOp.getDynamicSharedMemorySize() : zero; @@ -1241,14 +1223,10 @@ static Value bitAndAddrspaceCast(Location loc, if (destinationType.getAddressSpace() != sourceTy.getAddressSpace()) sourcePtr = rewriter.create( loc, - typeConverter.getPointerType(sourceTy.getElementType(), - destinationType.getAddressSpace()), + LLVM::LLVMPointerType::get(rewriter.getContext(), + destinationType.getAddressSpace()), sourcePtr); - - if (typeConverter.useOpaquePointers()) - return sourcePtr; - - return rewriter.create(loc, destinationType, sourcePtr); + return sourcePtr; } LogicalResult ConvertMemcpyOpToGpuRuntimeCallPattern::matchAndRewrite( @@ -1366,8 +1344,6 @@ LogicalResult ConvertCreateDnTensorOpToGpuRuntimeCallPattern::matchAndRewrite( auto stream = adaptor.getAsyncDependencies().front(); Value pTensor = MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) - pTensor = rewriter.create(loc, llvmPointerType, pTensor); Type dType = op.getMemref().getType().getElementType(); auto dtp = genConstInt32From(rewriter, loc, getCuSparseDataTypeFrom(dType)); @@ -1388,7 +1364,7 @@ LogicalResult ConvertCreateDnTensorOpToGpuRuntimeCallPattern::matchAndRewrite( auto handleSz = rewriter.create( loc, getIndexType(), rewriter.getIndexAttr(11032)); handle = rewriter.create( - loc, llvmInt8PointerType, llvmInt8Type, handleSz, /*alignment=*/16); + loc, llvmPointerType, llvmInt8Type, handleSz, /*alignment=*/16); handle = rewriter.create(loc, llvmPointerType, handle); createLtDnMatCallBuilder @@ -1457,11 +1433,6 @@ LogicalResult ConvertCreateCooOpToGpuRuntimeCallPattern::matchAndRewrite( MemRefDescriptor(adaptor.getColIdxs()).allocatedPtr(rewriter, loc); Value pValues = MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) { - pRowIdxs = rewriter.create(loc, llvmPointerType, pRowIdxs); - pColIdxs = rewriter.create(loc, llvmPointerType, pColIdxs); - pValues = rewriter.create(loc, llvmPointerType, pValues); - } Type iType = llvm::cast(op.getColIdxs().getType()).getElementType(); Type dType = @@ -1489,10 +1460,6 @@ LogicalResult ConvertCreateCooAoSOpToGpuRuntimeCallPattern::matchAndRewrite( Value pIdxs = MemRefDescriptor(adaptor.getIdxs()).allocatedPtr(rewriter, loc); Value pValues = MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) { - pIdxs = rewriter.create(loc, llvmPointerType, pIdxs); - pValues = rewriter.create(loc, llvmPointerType, pValues); - } Type iType = llvm::cast(op.getIdxs().getType()).getElementType(); Type dType = llvm::cast(op.getValues().getType()).getElementType(); @@ -1522,11 +1489,6 @@ LogicalResult ConvertCreateCsrOpToGpuRuntimeCallPattern::matchAndRewrite( MemRefDescriptor(adaptor.getColIdxs()).allocatedPtr(rewriter, loc); Value pValues = MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) { - pRowPos = rewriter.create(loc, llvmPointerType, pRowPos); - pColIdxs = rewriter.create(loc, llvmPointerType, pColIdxs); - pValues = rewriter.create(loc, llvmPointerType, pValues); - } Type pType = llvm::cast(op.getRowPos().getType()).getElementType(); Type iType = @@ -1556,8 +1518,6 @@ LogicalResult ConvertCreate2To4SpMatOpToGpuRuntimeCallPattern::matchAndRewrite( auto stream = adaptor.getAsyncDependencies().front(); Value pMat = MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) - pMat = rewriter.create(loc, llvmPointerType, pMat); Type dType = llvm::cast(op.getMemref().getType()).getElementType(); auto dtp = genConstInt32From(rewriter, loc, getCuSparseDataTypeFrom(dType)); @@ -1566,7 +1526,7 @@ LogicalResult ConvertCreate2To4SpMatOpToGpuRuntimeCallPattern::matchAndRewrite( auto handleSz = rewriter.create( loc, getIndexType(), rewriter.getIndexAttr(44104)); Value handle = rewriter.create( - loc, llvmInt8PointerType, llvmInt8Type, handleSz, /*alignment=*/16); + loc, llvmPointerType, llvmInt8Type, handleSz, /*alignment=*/16); handle = rewriter.create(loc, llvmPointerType, handle); create2To4SpMatCallBuilder @@ -1630,8 +1590,6 @@ LogicalResult ConvertSpMVOpToGpuRuntimeCallPattern::matchAndRewrite( auto stream = adaptor.getAsyncDependencies().front(); Value pBuf = MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) - pBuf = rewriter.create(loc, llvmPointerType, pBuf); spMVCallBuilder.create(loc, rewriter, {modeA, adaptor.getSpmatA(), adaptor.getDnX(), adaptor.getDnY(), computeType, pBuf, stream}); @@ -1658,7 +1616,7 @@ LogicalResult ConvertSpMMBufferSizeOpToGpuRuntimeCallPattern::matchAndRewrite( auto three = rewriter.create(loc, getIndexType(), rewriter.getIndexAttr(3)); auto bufferSize = rewriter.create( - loc, llvmInt64PointerType, llvmInt64Type, three, /*alignment=*/16); + loc, llvmPointerType, llvmPointerType, three, /*alignment=*/16); createCuSparseLtSpMMBufferSizeBuilder .create(loc, rewriter, {bufferSize, modeA, modeB, adaptor.getSpmatA(), @@ -1667,11 +1625,11 @@ LogicalResult ConvertSpMMBufferSizeOpToGpuRuntimeCallPattern::matchAndRewrite( .getResult(); auto bufferSizePtr1 = rewriter.create( - loc, llvmInt64PointerType, llvmInt64PointerType, bufferSize, + loc, llvmPointerType, llvmPointerType, bufferSize, ValueRange{rewriter.create( loc, getIndexType(), rewriter.getIndexAttr(1))}); auto bufferSizePtr2 = rewriter.create( - loc, llvmInt64PointerType, llvmInt64PointerType, bufferSize, + loc, llvmPointerType, llvmPointerType, bufferSize, ValueRange{rewriter.create( loc, getIndexType(), rewriter.getIndexAttr(2))}); auto bufferSize0 = @@ -1737,8 +1695,6 @@ LogicalResult ConvertSpMMOpToGpuRuntimeCallPattern::matchAndRewrite( SmallVector pBufs; for (Value buffer : adaptor.getBuffers()) { Value pBuf = MemRefDescriptor(buffer).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) - pBuf = rewriter.create(loc, llvmPointerType, pBuf); pBufs.push_back(pBuf); } createCuSparseLtSpMMBuilder.create( @@ -1748,8 +1704,6 @@ LogicalResult ConvertSpMMOpToGpuRuntimeCallPattern::matchAndRewrite( } else { Value pBuf = MemRefDescriptor(adaptor.getBuffers().front()) .allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) - pBuf = rewriter.create(loc, llvmPointerType, pBuf); createSpMMCallBuilder.create(loc, rewriter, {modeA, modeB, adaptor.getSpmatA(), adaptor.getDnmatB(), adaptor.getDnmatC(), @@ -1762,8 +1716,7 @@ LogicalResult ConvertSpMMOpToGpuRuntimeCallPattern::matchAndRewrite( template static void addOpaquePointerConversion(LLVMTypeConverter &converter) { converter.addConversion([&converter](T) -> Type { - return converter.getPointerType( - IntegerType::get(&converter.getContext(), 8)); + return LLVM::LLVMPointerType::get(&converter.getContext()); }); } @@ -1781,8 +1734,6 @@ LogicalResult ConvertSDDMMOpToGpuRuntimeCallPattern::matchAndRewrite( auto stream = adaptor.getAsyncDependencies().front(); Value pBuf = MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) - pBuf = rewriter.create(loc, llvmPointerType, pBuf); createSDDMMCallBuilder.create(loc, rewriter, {modeA, modeB, adaptor.getDnmatA(), adaptor.getDnmatB(), adaptor.getSpmatC(), @@ -1837,9 +1788,6 @@ ConvertSpGEMMWorkEstimationOrComputeOpToGpuRuntimeCallPattern::matchAndRewrite( Value pBuf = MemRefDescriptor(adaptor.getBuffer()).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) - pBuf = rewriter.create(loc, llvmPointerType, pBuf); - Value bufferSizeNew; if (adaptor.getKind() == @@ -1896,18 +1844,18 @@ LogicalResult ConvertSpMatGetSizeOpToGpuRuntimeCallPattern::matchAndRewrite( auto three = rewriter.create(loc, getIndexType(), rewriter.getIndexAttr(3)); auto buffer = rewriter.create( - loc, llvmInt64PointerType, llvmInt64Type, three, /*alignment=*/16); + loc, llvmPointerType, llvmInt64Type, three, /*alignment=*/16); auto rowsPtr = rewriter.create( - loc, llvmInt64PointerType, llvmInt64PointerType, buffer, + loc, llvmPointerType, llvmPointerType, buffer, ValueRange{rewriter.create(loc, getIndexType(), rewriter.getIndexAttr(0))}); auto colsPtr = rewriter.create( - loc, llvmInt64PointerType, llvmInt64PointerType, buffer, + loc, llvmPointerType, llvmPointerType, buffer, ValueRange{rewriter.create(loc, getIndexType(), rewriter.getIndexAttr(1))}); auto nnzsPtr = rewriter.create( - loc, llvmInt64PointerType, llvmInt64PointerType, buffer, + loc, llvmPointerType, llvmPointerType, buffer, ValueRange{rewriter.create(loc, getIndexType(), rewriter.getIndexAttr(2))}); createSpMatGetSizeBuilder.create( @@ -1934,11 +1882,6 @@ LogicalResult ConvertSetCsrPointersOpToGpuRuntimeCallPattern::matchAndRewrite( MemRefDescriptor(adaptor.getCoordinates()).allocatedPtr(rewriter, loc); Value pVal = MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) { - pPos = rewriter.create(loc, llvmPointerType, pPos); - pCrd = rewriter.create(loc, llvmPointerType, pCrd); - pVal = rewriter.create(loc, llvmPointerType, pVal); - } createSetCsrPointersBuilder.create( loc, rewriter, {adaptor.getSpmat(), pPos, pCrd, pVal, stream}); rewriter.replaceOp(op, {stream}); @@ -1959,11 +1902,6 @@ LogicalResult ConvertCreateCscOpToGpuRuntimeCallPattern::matchAndRewrite( MemRefDescriptor(adaptor.getRowIdxs()).allocatedPtr(rewriter, loc); Value pValues = MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) { - pColPos = rewriter.create(loc, llvmPointerType, pColPos); - pRowIdxs = rewriter.create(loc, llvmPointerType, pRowIdxs); - pValues = rewriter.create(loc, llvmPointerType, pValues); - } Type pType = llvm::cast(op.getColPos().getType()).getElementType(); Type iType = @@ -1997,11 +1935,6 @@ LogicalResult ConvertCreateBsrOpToGpuRuntimeCallPattern::matchAndRewrite( MemRefDescriptor(adaptor.getBColIdxs()).allocatedPtr(rewriter, loc); Value pValues = MemRefDescriptor(adaptor.getValues()).allocatedPtr(rewriter, loc); - if (!getTypeConverter()->useOpaquePointers()) { - pRowPos = rewriter.create(loc, llvmPointerType, pRowPos); - pColIdxs = rewriter.create(loc, llvmPointerType, pColIdxs); - pValues = rewriter.create(loc, llvmPointerType, pValues); - } Type pType = llvm::cast(op.getBRowPos().getType()).getElementType(); Type iType = diff --git a/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir index 113d49c507e9c8..f448d35992333b 100644 --- a/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-2to4-sparse-to-gpu-runtime-calls.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s +// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s module attributes {gpu.container_module} { diff --git a/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir index 70450656b9df64..ae8b7aaac7fd94 100644 --- a/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s +// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s module attributes {gpu.container_module} { // CHECK-LABEL: llvm.func @main diff --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir index 9df110d9b23bac..1b9afcdf50a17f 100644 --- a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=nvvm.cubin use-opaque-pointers=1" -split-input-file | FileCheck %s -// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=rocdl.hsaco use-opaque-pointers=1" -split-input-file | FileCheck %s --check-prefix=ROCDL +// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" -split-input-file | FileCheck %s +// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=rocdl.hsaco" -split-input-file | FileCheck %s --check-prefix=ROCDL module attributes {gpu.container_module} { diff --git a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir index 5c8e6d11934dbb..3f86b076982795 100644 --- a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s +// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s module attributes {gpu.container_module} { diff --git a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir index 7e4b1191c5e6c2..aaced31813d574 100644 --- a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s +// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s module attributes {gpu.container_module} { diff --git a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir index f86d929e0e19ac..d4c0a76088356f 100644 --- a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s +// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s module attributes {gpu.container_module} { diff --git a/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir index a828c1d58da5f9..d15efe354cfa8b 100644 --- a/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-wait-to-gpu-runtime-calls.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s +// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s module attributes {gpu.container_module} { diff --git a/mlir/test/Conversion/GPUCommon/transfer_write.mlir b/mlir/test/Conversion/GPUCommon/transfer_write.mlir index 9c2edf698548ba..cba85915b49e43 100644 --- a/mlir/test/Conversion/GPUCommon/transfer_write.mlir +++ b/mlir/test/Conversion/GPUCommon/transfer_write.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=1' | FileCheck %s +// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s func.func @warp_extract(%arg0: index, %arg1: memref<1024x1024xf32>, %arg2: index, %arg3: vector<1xf32>) { %c0 = arith.constant 0 : index diff --git a/mlir/test/Conversion/GPUCommon/typed-pointers.mlir b/mlir/test/Conversion/GPUCommon/typed-pointers.mlir deleted file mode 100644 index e27162c7dbc190..00000000000000 --- a/mlir/test/Conversion/GPUCommon/typed-pointers.mlir +++ /dev/null @@ -1,82 +0,0 @@ -// RUN: mlir-opt %s --gpu-to-llvm='use-opaque-pointers=0' --split-input-file | FileCheck %s - -module attributes {gpu.container_module} { - // CHECK-LABEL: llvm.func @main - // CHECK-SAME: %[[size:.*]]: i64 - func.func @main(%size : index) { - // CHECK: %[[stream:.*]] = llvm.call @mgpuStreamCreate() - %0 = gpu.wait async - // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}}[%[[size]]] - // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] - // CHECK: %[[isHostShared:.*]] = llvm.mlir.constant - // CHECK: llvm.call @mgpuMemAlloc(%[[size_bytes]], %[[stream]], %[[isHostShared]]) - %1, %2 = gpu.alloc async [%0] (%size) : memref - // CHECK: %[[float_ptr:.*]] = llvm.extractvalue {{.*}}[0] - // CHECK: %[[void_ptr:.*]] = llvm.bitcast %[[float_ptr]] - // CHECK: llvm.call @mgpuMemFree(%[[void_ptr]], %[[stream]]) - %3 = gpu.dealloc async [%2] %1 : memref - // CHECK: llvm.call @mgpuStreamSynchronize(%[[stream]]) - // CHECK: llvm.call @mgpuStreamDestroy(%[[stream]]) - gpu.wait [%3] - return - } - - // CHECK: func @foo - func.func @foo(%dst : memref<7xf32, 1>, %src : memref<7xf32>) { - // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate - %t0 = gpu.wait async - // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint - // CHECK-NOT: llvm.addrspacecast - // CHECK: %[[src:.*]] = llvm.bitcast - // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast - // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]] - // CHECK: llvm.call @mgpuMemcpy(%[[dst]], %[[src]], %[[size_bytes]], %[[t0]]) - %t1 = gpu.memcpy async [%t0] %dst, %src : memref<7xf32, 1>, memref<7xf32> - // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]]) - // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]]) - gpu.wait [%t1] - return - } -} - -// ----- - -module attributes {gpu.container_module} { - - // CHECK: func @memset_f32 - func.func @memset_f32(%dst : memref<7xf32, 1>, %value : f32) { - // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate - %t0 = gpu.wait async - // CHECK: %[[size_bytes:.*]] = llvm.mlir.constant - // CHECK: %[[value:.*]] = llvm.bitcast - // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast - // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]] - // CHECK: llvm.call @mgpuMemset32(%[[dst]], %[[value]], %[[size_bytes]], %[[t0]]) - %t1 = gpu.memset async [%t0] %dst, %value : memref<7xf32, 1>, f32 - // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]]) - // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]]) - gpu.wait [%t1] - return - } -} - -// ----- - -module attributes {gpu.container_module} { - - // CHECK: func @memset_f16 - func.func @memset_f16(%dst : memref<7xf16, 1>, %value : f16) { - // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate - %t0 = gpu.wait async - // CHECK: %[[size_bytes:.*]] = llvm.mlir.constant - // CHECK: %[[value:.*]] = llvm.bitcast - // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast - // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]] - // CHECK: llvm.call @mgpuMemset16(%[[dst]], %[[value]], %[[size_bytes]], %[[t0]]) - %t1 = gpu.memset async [%t0] %dst, %value : memref<7xf16, 1>, f16 - // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]]) - // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]]) - gpu.wait [%t1] - return - } -} From 6086c272a3a59eb0b6b79dcbe00486bf4461856a Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Tue, 31 Oct 2023 17:26:56 +0900 Subject: [PATCH 131/144] [mlir][memref] Fix out-of-bounds crash when reifying result dims (#70774) Do not crash when the input IR is invalid, i.e., when the index of the dimension operand of a `tensor.dim`/`memref.dim` is out-of-bounds. This fixes #70180. --- .../ResolveShapedTypeResultDims.cpp | 3 +++ mlir/test/Dialect/MemRef/resolve-dim-ops.mlir | 27 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 mlir/test/Dialect/MemRef/resolve-dim-ops.mlir diff --git a/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp b/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp index f18ae2cc9b6881..fe2eede375ec15 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp @@ -94,6 +94,9 @@ struct DimOfReifyRankedShapedTypeOpInterface : public OpRewritePattern { reifiedResultShapes))) return failure(); unsigned resultNumber = dimValue.getResultNumber(); + // Do not apply pattern if the IR is invalid (dim out of bounds). + if (*dimIndex >= reifiedResultShapes[resultNumber].size()) + return rewriter.notifyMatchFailure(dimOp, "dimension is out of bounds"); Value replacement = getValueOrCreateConstantIndexOp( rewriter, dimOp.getLoc(), reifiedResultShapes[resultNumber][*dimIndex]); rewriter.replaceOp(dimOp, replacement); diff --git a/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir b/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir new file mode 100644 index 00000000000000..18e9a9d02e1081 --- /dev/null +++ b/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir @@ -0,0 +1,27 @@ +// RUN: mlir-opt --resolve-ranked-shaped-type-result-dims --split-input-file %s | FileCheck %s + +// CHECK-LABEL: func @dim_out_of_bounds( +// CHECK-NEXT: arith.constant +// CHECK-NEXT: memref.dim +// CHECK-NEXT: return +func.func @dim_out_of_bounds(%m : memref<7x8xf32>) -> index { + %idx = arith.constant 7 : index + %0 = memref.dim %m, %idx : memref<7x8xf32> + return %0 : index +} + +// ----- + +// CHECK-LABEL: func @dim_out_of_bounds_2( +// CHECK-NEXT: arith.constant +// CHECK-NEXT: arith.constant +// CHECK-NEXT: bufferization.alloc_tensor +// CHECK-NEXT: tensor.dim +// CHECK-NEXT: return +func.func @dim_out_of_bounds_2(%idx1 : index, %idx2 : index) -> index { + %idx = arith.constant 7 : index + %sz = arith.constant 5 : index + %alloc = bufferization.alloc_tensor(%sz, %sz) : tensor + %0 = tensor.dim %alloc, %idx : tensor + return %0 : index +} From 03c8fbf092a6b6169239d5824df5cced5c020419 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 31 Oct 2023 08:41:56 +0000 Subject: [PATCH 132/144] [RISCV] Add _RM pseudos to pseudos table (#70693) --- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 9 +- .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll | 84 +++++++++---------- 2 files changed, 46 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 445f057d61ee1d..bec67153b6543d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -134,7 +134,8 @@ class PseudoToVInst { ["_M2", ""], ["_M4", ""], ["_M8", ""], - ["_SE", ""] + ["_SE", ""], + ["_RM", ""] ]; string VInst = !foldl(PseudoInst, AffixSubsts, Acc, AffixSubst, !subst(AffixSubst[0], AffixSubst[1], Acc)); @@ -1113,7 +1114,8 @@ class VPseudoUnaryNoMask_FRM : Pseudo<(outs RetClass:$rd), (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$frm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> { + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1131,7 +1133,8 @@ class VPseudoUnaryMask_FRM.R:$rd), (ins GetVRegNoV0.R:$merge, OpClass:$rs2, VMaskOp:$vm, ixlenimm:$frm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> { + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll index 9c30d3ac71679b..e62eec573a5704 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -529,16 +529,15 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind { ; ; LMULMAX2-RV32F-LABEL: ctlz_v2i64: ; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: vwsubu.wv v9, v9, v8 ; LMULMAX2-RV32F-NEXT: li a1, 64 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -550,12 +549,12 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64F: # %bb.0: ; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v9, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v9, 23 ; LMULMAX2-RV64F-NEXT: li a1, 190 ; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 ; LMULMAX2-RV64F-NEXT: li a1, 64 ; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -1129,16 +1128,15 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind { ; ; LMULMAX2-RV32F-LABEL: ctlz_v4i64: ; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v12, v8 +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v12, 23 ; LMULMAX2-RV32F-NEXT: vwsubu.wv v10, v10, v8 ; LMULMAX2-RV32F-NEXT: li a1, 64 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma @@ -1150,16 +1148,16 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64F: # %bb.0: ; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64F-NEXT: li a1, 190 +; LMULMAX2-RV64F-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v11, v8 ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v11, 23 +; LMULMAX2-RV64F-NEXT: vwsubu.vv v12, v10, v8 ; LMULMAX2-RV64F-NEXT: li a1, 64 ; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV64F-NEXT: vminu.vx v8, v10, a1 +; LMULMAX2-RV64F-NEXT: vminu.vx v8, v12, a1 ; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64F-NEXT: ret ; @@ -1716,16 +1714,15 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { ; ; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v2i64: ; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: vwsubu.wv v9, v9, v8 ; LMULMAX2-RV32F-NEXT: vse64.v v9, (a0) ; LMULMAX2-RV32F-NEXT: ret @@ -1734,12 +1731,12 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64F: # %bb.0: ; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v9, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v9, 23 ; LMULMAX2-RV64F-NEXT: li a1, 190 ; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV64F-NEXT: fsrm a1 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 ; LMULMAX2-RV64F-NEXT: vse64.v v10, (a0) ; LMULMAX2-RV64F-NEXT: ret @@ -2286,16 +2283,15 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ; ; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v4i64: ; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v12, v8 +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v12, 23 ; LMULMAX2-RV32F-NEXT: vwsubu.wv v10, v10, v8 ; LMULMAX2-RV32F-NEXT: vse64.v v10, (a0) ; LMULMAX2-RV32F-NEXT: ret @@ -2304,14 +2300,14 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64F: # %bb.0: ; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64F-NEXT: li a1, 190 +; LMULMAX2-RV64F-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v11, v8 ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 -; LMULMAX2-RV64F-NEXT: vse64.v v10, (a0) +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v11, 23 +; LMULMAX2-RV64F-NEXT: vwsubu.vv v12, v10, v8 +; LMULMAX2-RV64F-NEXT: vse64.v v12, (a0) ; LMULMAX2-RV64F-NEXT: ret ; ; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v4i64: From 03934e70ef1cf86a6ebd1a56e0b5b46f4fec8e13 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 31 Oct 2023 09:51:06 +0100 Subject: [PATCH 133/144] [SystemZ] Enable AtomicExpand pass (#70398) The upcoming OpenMP support for SystemZ requires handling of IR insns like `atomicrmw fadd`. Normally atomic float operations are expanded by Clang and such insns do not occur, but OpenMP generates them directly. Other architectures handle this using the AtomicExpand pass, which SystemZ did not need so far. Enable it. Currently AtomicExpand treats atomic load and stores of floats pessimistically: it casts them to integers, which SystemZ does not need, since the floating point load and store instructions are already atomic. However, the way Clang currently expands them is pessimistic as well, so this change does not make things worse. Optimizing operations on atomic floats can be a separate change in the future. This change does not create any differences the Linux kernel build. --- .../Target/SystemZ/SystemZISelLowering.cpp | 9 ++++ llvm/lib/Target/SystemZ/SystemZISelLowering.h | 2 + .../Target/SystemZ/SystemZTargetMachine.cpp | 2 + llvm/test/CodeGen/SystemZ/atomic-load-06.ll | 13 ++++++ llvm/test/CodeGen/SystemZ/atomic-load-07.ll | 11 +++++ llvm/test/CodeGen/SystemZ/atomic-load-08.ll | 20 +++++++++ llvm/test/CodeGen/SystemZ/atomic-store-06.ll | 13 ++++++ llvm/test/CodeGen/SystemZ/atomic-store-07.ll | 11 +++++ llvm/test/CodeGen/SystemZ/atomic-store-08.ll | 20 +++++++++ .../test/CodeGen/SystemZ/atomicrmw-fadd-01.ll | 22 ++++++++++ .../test/CodeGen/SystemZ/atomicrmw-fadd-02.ll | 19 +++++++++ .../test/CodeGen/SystemZ/atomicrmw-fadd-03.ll | 30 +++++++++++++ .../test/CodeGen/SystemZ/atomicrmw-fmax-01.ll | 27 ++++++++++++ .../test/CodeGen/SystemZ/atomicrmw-fmax-02.ll | 24 +++++++++++ .../test/CodeGen/SystemZ/atomicrmw-fmax-03.ll | 42 +++++++++++++++++++ .../test/CodeGen/SystemZ/atomicrmw-fmin-01.ll | 27 ++++++++++++ .../test/CodeGen/SystemZ/atomicrmw-fmin-02.ll | 24 +++++++++++ .../test/CodeGen/SystemZ/atomicrmw-fmin-03.ll | 42 +++++++++++++++++++ .../test/CodeGen/SystemZ/atomicrmw-fsub-01.ll | 22 ++++++++++ .../test/CodeGen/SystemZ/atomicrmw-fsub-02.ll | 19 +++++++++ .../test/CodeGen/SystemZ/atomicrmw-fsub-03.ll | 30 +++++++++++++ .../CodeGen/SystemZ/atomicrmw-udec_wrap.ll | 28 +++++++++++++ .../CodeGen/SystemZ/atomicrmw-uinc_wrap.ll | 22 ++++++++++ .../test/CodeGen/SystemZ/atomicrmw-xchg-05.ll | 17 ++++++++ .../test/CodeGen/SystemZ/atomicrmw-xchg-06.ll | 16 +++++++ .../test/CodeGen/SystemZ/atomicrmw-xchg-07.ll | 24 +++++++++++ 26 files changed, 536 insertions(+) create mode 100644 llvm/test/CodeGen/SystemZ/atomic-load-06.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomic-load-07.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomic-load-08.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomic-store-06.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomic-store-07.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomic-store-08.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fadd-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fadd-02.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fadd-03.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fmax-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fmax-02.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fmin-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fmin-02.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fsub-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fsub-02.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-fsub-03.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-udec_wrap.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-uinc_wrap.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-xchg-05.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-xchg-06.ll create mode 100644 llvm/test/CodeGen/SystemZ/atomicrmw-xchg-07.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3db777f904df0a..e6ea4205623d31 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -872,6 +872,15 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const return false; } +TargetLowering::AtomicExpansionKind +SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { + return (RMW->isFloatingPointOperation() || + RMW->getOperation() == AtomicRMWInst::UIncWrap || + RMW->getOperation() == AtomicRMWInst::UDecWrap) + ? AtomicExpansionKind::CmpXChg + : AtomicExpansionKind::None; +} + bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { // We can use CGFI or CLGFI. return isInt<32>(Imm) || isUInt<32>(Imm); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 0d4b4873e9d73e..fd951b935702aa 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -452,6 +452,8 @@ class SystemZTargetLowering : public TargetLowering { return VT != MVT::f64; } bool hasInlineStackProbe(const MachineFunction &MF) const override; + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override; bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index 8235446da65c45..186494ad2ac614 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -226,6 +226,8 @@ void SystemZPassConfig::addIRPasses() { addPass(createLoopDataPrefetchPass()); } + addPass(createAtomicExpandPass()); + TargetPassConfig::addIRPasses(); } diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-06.ll b/llvm/test/CodeGen/SystemZ/atomic-load-06.ll new file mode 100644 index 00000000000000..c9c5504520345c --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomic-load-06.ll @@ -0,0 +1,13 @@ +; Test float atomic loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define float @f1(ptr %src) { +; CHECK-LABEL: f1: +; CHECK: lgf [[R:%r[0-9]+]], 0(%r2) +; CHECK: sllg [[R]], [[R]], 32 +; CHECK: ldgr %f0, [[R]] +; CHECK: br %r14 + %val = load atomic float, ptr %src seq_cst, align 4 + ret float %val +} diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-07.ll b/llvm/test/CodeGen/SystemZ/atomic-load-07.ll new file mode 100644 index 00000000000000..d183cb6af3d20d --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomic-load-07.ll @@ -0,0 +1,11 @@ +; Test double atomic loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define double @f1(ptr %src) { +; CHECK-LABEL: f1: +; CHECK: ld %f0, 0(%r2) +; CHECK: br %r14 + %val = load atomic double, ptr %src seq_cst, align 8 + ret double %val +} diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-08.ll b/llvm/test/CodeGen/SystemZ/atomic-load-08.ll new file mode 100644 index 00000000000000..069d2168e19af7 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomic-load-08.ll @@ -0,0 +1,20 @@ +; Test long double atomic loads. Expect a libcall. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(ptr %ret, ptr %src) { +; CHECK-LABEL: f1: +; CHECK: lgr [[RET:%r[0-9]+]], %r2 +; CHECK: la %r4, 160(%r15) +; CHECK: lghi %r2, 16 +; CHECK: lhi %r5, 5 +; CHECK: brasl %r14, __atomic_load@PLT +; CHECK: ld [[FL:%f[0-9]+]], 160(%r15) +; CHECK: ld [[FH:%f[0-9]+]], 168(%r15) +; CHECK: std [[FL]], 0([[RET]]) +; CHECK: std [[FH]], 8([[RET]]) +; CHECK: br %r14 + %val = load atomic fp128, ptr %src seq_cst, align 8 + store fp128 %val, ptr %ret, align 8 + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/atomic-store-06.ll b/llvm/test/CodeGen/SystemZ/atomic-store-06.ll new file mode 100644 index 00000000000000..fd39793faefc8e --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomic-store-06.ll @@ -0,0 +1,13 @@ +; Test float atomic loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(ptr %src, float %val) { +; CHECK-LABEL: f1: +; CHECK: lgdr [[R:%r[0-9]+]], %f0 +; CHECK: srlg [[R]], [[R]], 32 +; CHECK: st [[R]], 0(%r2) +; CHECK: br %r14 + store atomic float %val, ptr %src seq_cst, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/atomic-store-07.ll b/llvm/test/CodeGen/SystemZ/atomic-store-07.ll new file mode 100644 index 00000000000000..c904b738f2c576 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomic-store-07.ll @@ -0,0 +1,11 @@ +; Test double atomic stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(ptr %dst, double %val) { +; CHECK-LABEL: f1: +; CHECK: std %f0, 0(%r2) +; CHECK: br %r14 + store atomic double %val, ptr %dst seq_cst, align 8 + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/atomic-store-08.ll b/llvm/test/CodeGen/SystemZ/atomic-store-08.ll new file mode 100644 index 00000000000000..b33b283e8dbd76 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomic-store-08.ll @@ -0,0 +1,20 @@ +; Test long double atomic stores. Expect a libcall. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(ptr %dst, ptr %src) { +; CHECK-LABEL: f1: +; CHECK: ld [[FL:%f[0-9]+]], 0(%r3) +; CHECK: ld [[FH:%f[0-9]+]], 8(%r3) +; CHECK: lgr %r3, %r2 +; CHECK: std [[FL]], 160(%r15) +; CHECK: std [[FH]], 168(%r15) +; CHECK: la %r4, 160(%r15) +; CHECK: lghi %r2, 16 +; CHECK: lhi %r5, 5 +; CHECK: brasl %r14, __atomic_store@PLT +; CHECK: br %r14 + %val = load fp128, ptr %src, align 8 + store atomic fp128 %val, ptr %dst seq_cst, align 8 + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-01.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-01.ll new file mode 100644 index 00000000000000..1bfa055781c988 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-01.ll @@ -0,0 +1,22 @@ +; Test atomic float addition. Expect a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define float @f1(ptr %src, float %b) { +; CHECK-LABEL: f1: +; CHECK: le [[F:%f[0-9]+]], 0(%r2) +; CHECK: [[L:\.L.+]]: +; CHECK: lgdr [[RI:%r[0-9]+]], [[F]] +; CHECK: aebr [[F]], %f0 +; CHECK: lgdr [[RO:%r[0-9]+]], [[F]] +; CHECK: srlg [[RO]], [[RO]], 32 +; CHECK: srlg [[RI]], [[RI]], 32 +; CHECK: cs [[RI]], [[RO]], 0(%r2) +; CHECK: sllg [[RI]], [[RI]], 32 +; CHECK: ldgr [[F]], [[RI]] +; CHECK: jl [[L]] +; CHECK: ler %f0, [[F]] +; CHECK: br %r14 + %res = atomicrmw fadd ptr %src, float %b seq_cst + ret float %res +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-02.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-02.ll new file mode 100644 index 00000000000000..7f9ee3cbec22e6 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-02.ll @@ -0,0 +1,19 @@ +; Test atomic double addition. Expect a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define double @f1(ptr %src, double %b) { +; CHECK-LABEL: f1: +; CHECK: ld [[F:%f[0-9]+]], 0(%r2) +; CHECK: [[L:\.L.+]]: +; CHECK: lgdr [[RI:%r[0-9]+]], [[F]] +; CHECK: adbr [[F]], %f0 +; CHECK: lgdr [[RO:%r[0-9]+]], [[F]] +; CHECK: csg [[RI]], [[RO]], 0(%r2) +; CHECK: ldgr [[F]], [[RI]] +; CHECK: jl [[L]] +; CHECK: ldr %f0, [[F]] +; CHECK: br %r14 + %res = atomicrmw fadd ptr %src, double %b seq_cst + ret double %res +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-03.ll new file mode 100644 index 00000000000000..729fcbc4ac1e75 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fadd-03.ll @@ -0,0 +1,30 @@ +; Test atomic long double addition. Expect a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(ptr %ret, ptr %src, ptr %b) { +; CHECK-LABEL: f1: +; CHECK: [[FBL:%f[0-9]+]], 0(%r4) +; CHECK: [[FBH:%f[0-9]+]], 8(%r4) +; CHECK: [[FSL:%f[0-9]+]], 0(%r3) +; CHECK: [[FSH:%f[0-9]+]], 8(%r3) +; CHECK: [[LABEL:\.L.+]]: +; CHECK: lgdr [[RISH:%r[0-9]+]], [[FSH]] +; CHECK: lgdr [[RISL:%r[0-9]+]], [[FSL]] +; CHECK: axbr [[FSL]], [[FBL]] +; CHECK: lgdr [[ROSH:%r[0-9]+]], [[FSH]] +; CHECK: lgdr [[ROSL:%r[0-9]+]], [[FSL]] +; CHECK: cdsg [[RISL]], [[ROSL]], 0(%r3) +; CHECK: stg [[RISH]], 168(%r15) +; CHECK: stg [[RISL]], 160(%r15) +; CHECK: ld [[FSL]], 160(%r15) +; CHECK: ld [[FSH]], 168(%r15) +; CHECK: jl [[LABEL]] +; CHECK: std [[FSL]], 0(%r2) +; CHECK: std [[FSH]], 8(%r2) +; CHECK: br %r14 + %val = load fp128, ptr %b + %res = atomicrmw fadd ptr %src, fp128 %val seq_cst + store fp128 %res, ptr %ret + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-01.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-01.ll new file mode 100644 index 00000000000000..80c43137e3a03d --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-01.ll @@ -0,0 +1,27 @@ +; Test atomic float maximum. +; Expect a libcall in a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define float @f1(ptr %src, float %b) { +; CHECK-LABEL: f1: +; CHECK: lgr [[SRC:%r[0-9]+]], %r2 +; CHECK: le [[FSRC:%f[0-9]+]], 0(%r2) +; CHECK: ler [[FB:%f[0-9]+]], %f0 +; CHECK: [[L:\.L.+]]: +; CHECK: ler %f0, [[FSRC]] +; CHECK: ler %f2, [[FB]] +; CHECK: brasl %r14, fmaxf@PLT +; CHECK: lgdr [[RO:%r[0-9]+]], %f0 +; CHECK: srlg [[RO]], [[RO]], 32 +; CHECK: lgdr [[RI:%r[0-9]+]], [[FSRC]] +; CHECK: srlg [[RI]], [[RI]], 32 +; CHECK: cs [[RI]], [[RO]], 0([[SRC]]) +; CHECK: sllg [[RO]], [[RI]], 32 +; CHECK: ldgr [[FSRC]], [[RO]] +; CHECK: jl [[L]] +; CHECK: ler %f0, [[FSRC]] +; CHECK: br %r14 + %res = atomicrmw fmax ptr %src, float %b seq_cst + ret float %res +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-02.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-02.ll new file mode 100644 index 00000000000000..8c245bb049f3f3 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-02.ll @@ -0,0 +1,24 @@ +; Test atomic double maximum. +; Expect a libcall in a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define double @f1(ptr %src, double %b) { +; CHECK-LABEL: f1: +; CHECK: lgr [[RB:%r[0-9]+]], %r2 +; CHECK: ld [[FB:%f[0-9]+]], 0(%r2) +; CHECK: ldr [[FSRC:%f[0-9]+]], %f0 +; CHECK: [[L:\.L.+]]: +; CHECK: ldr %f0, [[FB]] +; CHECK: ldr %f2, [[FSRC]] +; CHECK: brasl %r14, fmax@PLT +; CHECK: lgdr [[RO:%r[0-9]+]], %f0 +; CHECK: lgdr [[RI:%r[0-9]+]], [[FB]] +; CHECK: csg [[RI]], [[RO]], 0([[RB]]) +; CHECK: ldgr [[FB]], [[RI]] +; CHECK: jl [[L]] +; CHECK: ldr %f0, [[FB]] +; CHECK: br %r14 + %res = atomicrmw fmax ptr %src, double %b seq_cst + ret double %res +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll new file mode 100644 index 00000000000000..3c8ea19f86f860 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll @@ -0,0 +1,42 @@ +; Test atomic long double maximum. +; Expect a libcall in a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(ptr %ret, ptr %src, ptr %b) { +; CHECK-LABEL: f1: +; CHECK: lgr [[SRC:%r[0-9]+]], %r3 +; CHECK: ld [[FBL:%f[0-9]+]], 0(%r4) +; CHECK: ld [[FBH:%f[0-9]+]], 8(%r4) +; CHECK: ld [[FSL:%f[0-9]+]], 0(%r3) +; CHECK: ld [[FSH:%f[0-9]+]], 8(%r3) +; CHECK: lgr [[RET:%r[0-9]+]], %r2 +; CHECK: [[L:\.L.+]]: +; CHECK: std [[FBL]], 160(%r15) +; CHECK: std [[FBH]], 168(%r15) +; CHECK: la %r2, 192(%r15) +; CHECK: la %r3, 176(%r15) +; CHECK: la %r4, 160(%r15) +; CHECK: std [[FSL]], 176(%r15) +; CHECK: std [[FSH]], 184(%r15) +; CHECK: brasl %r14, fmaxl@PLT +; CHECK: ld [[FL:%f[0-9]+]], 192(%r15) +; CHECK: ld [[FH:%f[0-9]+]], 200(%r15) +; CHECK: lgdr [[RH:%r[0-9]+]], [[FH]] +; CHECK: lgdr [[RL:%r[0-9]+]], [[FL]] +; CHECK: lgdr [[RSH:%r[0-9]+]], [[FSH]] +; CHECK: lgdr [[RSL:%r[0-9]+]], [[FSL]] +; CHECK: cdsg [[RSL]], [[RL]], 0([[SRC]]) +; CHECK: stg [[RSH]], 216(%r15) +; CHECK: stg [[RSL]], 208(%r15) +; CHECK: ld [[FSL]], 208(%r15) +; CHECK: ld [[FSH]], 216(%r15) +; CHECK: jl [[L]] +; CHECK: std [[FSL]], 0([[RET]]) +; CHECK: std [[FSH]], 8([[RET]]) +; CHECK: br %r14 + %val = load fp128, ptr %b + %res = atomicrmw fmax ptr %src, fp128 %val seq_cst + store fp128 %res, ptr %ret + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-01.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-01.ll new file mode 100644 index 00000000000000..c67b02e688de3b --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-01.ll @@ -0,0 +1,27 @@ +; Test atomic float minimum. +; Expect a libcall in a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define float @f1(ptr %src, float %b) { +; CHECK-LABEL: f1: +; CHECK: lgr [[SRC:%r[0-9]+]], %r2 +; CHECK: le [[FSRC:%f[0-9]+]], 0(%r2) +; CHECK: ler [[FB:%f[0-9]+]], %f0 +; CHECK: [[L:\.L.+]]: +; CHECK: ler %f0, [[FSRC]] +; CHECK: ler %f2, [[FB]] +; CHECK: brasl %r14, fminf@PLT +; CHECK: lgdr [[RO:%r[0-9]+]], %f0 +; CHECK: srlg [[RO]], [[RO]], 32 +; CHECK: lgdr [[RI:%r[0-9]+]], [[FSRC]] +; CHECK: srlg [[RI]], [[RI]], 32 +; CHECK: cs [[RI]], [[RO]], 0([[SRC]]) +; CHECK: sllg [[RO]], [[RI]], 32 +; CHECK: ldgr [[FSRC]], [[RO]] +; CHECK: jl [[L]] +; CHECK: ler %f0, [[FSRC]] +; CHECK: br %r14 + %res = atomicrmw fmin ptr %src, float %b seq_cst + ret float %res +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-02.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-02.ll new file mode 100644 index 00000000000000..6691a8b21d11bb --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-02.ll @@ -0,0 +1,24 @@ +; Test atomic double minimum. +; Expect a libcall in a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define double @f1(ptr %src, double %b) { +; CHECK-LABEL: f1: +; CHECK: lgr [[SRC:%r[0-9]+]], %r2 +; CHECK: ld [[FSRC:%f[0-9]+]], 0(%r2) +; CHECK: ldr [[FB:%f[0-9]+]], %f0 +; CHECK: [[L:\.L.+]]: +; CHECK: ldr %f0, [[FSRC]] +; CHECK: ldr %f2, [[FB]] +; CHECK: brasl %r14, fmin@PLT +; CHECK: lgdr [[RO:%r[0-9]+]], %f0 +; CHECK: lgdr [[RI:%r[0-9]+]], [[FSRC]] +; CHECK: csg [[RI]], [[RO]], 0([[SRC]]) +; CHECK: ldgr [[FSRC]], [[RI]] +; CHECK: jl [[L]] +; CHECK: ldr %f0, [[FSRC]] +; CHECK: br %r14 + %res = atomicrmw fmin ptr %src, double %b seq_cst + ret double %res +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll new file mode 100644 index 00000000000000..dfa2cc021d1667 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll @@ -0,0 +1,42 @@ +; Test atomic long double minimum. +; Expect a libcall in a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(ptr %ret, ptr %src, ptr %b) { +; CHECK-LABEL: f1: +; CHECK: lgr [[SRC:%r[0-9]+]], %r3 +; CHECK: ld [[FBL:%f[0-9]+]], 0(%r4) +; CHECK: ld [[FBH:%f[0-9]+]], 8(%r4) +; CHECK: ld [[FSL:%f[0-9]+]], 0(%r3) +; CHECK: ld [[FSH:%f[0-9]+]], 8(%r3) +; CHECK: lgr [[RET:%r[0-9]+]], %r2 +; CHECK: [[L:\.L.+]]: +; CHECK: std [[FBL]], 160(%r15) +; CHECK: std [[FBH]], 168(%r15) +; CHECK: la %r2, 192(%r15) +; CHECK: la %r3, 176(%r15) +; CHECK: la %r4, 160(%r15) +; CHECK: std [[FSL]], 176(%r15) +; CHECK: std [[FSH]], 184(%r15) +; CHECK: brasl %r14, fminl@PLT +; CHECK: ld [[FL:%f[0-9]+]], 192(%r15) +; CHECK: ld [[FH:%f[0-9]+]], 200(%r15) +; CHECK: lgdr [[RH:%r[0-9]+]], [[FH]] +; CHECK: lgdr [[RL:%r[0-9]+]], [[FL]] +; CHECK: lgdr [[RSH:%r[0-9]+]], [[FSH]] +; CHECK: lgdr [[RSL:%r[0-9]+]], [[FSL]] +; CHECK: cdsg [[RSL]], [[RL]], 0([[SRC]]) +; CHECK: stg [[RSH]], 216(%r15) +; CHECK: stg [[RSL]], 208(%r15) +; CHECK: ld [[FSL]], 208(%r15) +; CHECK: ld [[FSH]], 216(%r15) +; CHECK: jl [[L]] +; CHECK: std [[FSL]], 0([[RET]]) +; CHECK: std [[FSH]], 8([[RET]]) +; CHECK: br %r14 + %val = load fp128, ptr %b + %res = atomicrmw fmin ptr %src, fp128 %val seq_cst + store fp128 %res, ptr %ret + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fsub-01.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fsub-01.ll new file mode 100644 index 00000000000000..3f4ad31762753f --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fsub-01.ll @@ -0,0 +1,22 @@ +; Test atomic float subtraction. Expect a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define float @f1(ptr %src, float %b) { +; CHECK-LABEL: f1: +; CHECK: le [[F:%f[0-9]+]], 0(%r2) +; CHECK: [[L:\.L.+]]: +; CHECK: lgdr [[RI:%r[0-9]+]], [[F]] +; CHECK: sebr [[F]], %f0 +; CHECK: lgdr [[RO:%r[0-9]+]], [[F]] +; CHECK: srlg [[RO]], [[RO]], 32 +; CHECK: srlg [[RI]], [[RI]], 32 +; CHECK: cs [[RI]], [[RO]], 0(%r2) +; CHECK: sllg [[RI]], [[RI]], 32 +; CHECK: ldgr [[F]], [[RI]] +; CHECK: jl [[L]] +; CHECK: ler %f0, [[F]] +; CHECK: br %r14 + %res = atomicrmw fsub ptr %src, float %b seq_cst + ret float %res +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fsub-02.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fsub-02.ll new file mode 100644 index 00000000000000..69071f9cbe029a --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fsub-02.ll @@ -0,0 +1,19 @@ +; Test atomic double subtraction. Expect a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define double @f1(ptr %src, double %b) { +; CHECK-LABEL: f1: +; CHECK: ld [[F:%f[0-9]+]], 0(%r2) +; CHECK: [[L:\.L.+]]: +; CHECK: lgdr [[RI:%r[0-9]+]], [[F]] +; CHECK: sdbr [[F]], %f0 +; CHECK: lgdr [[RO:%r[0-9]+]], [[F]] +; CHECK: csg [[RI]], [[RO]], 0(%r2) +; CHECK: ldgr [[F]], [[RI]] +; CHECK: jl [[L]] +; CHECK: ldr %f0, [[F]] +; CHECK: br %r14 + %res = atomicrmw fsub ptr %src, double %b seq_cst + ret double %res +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fsub-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fsub-03.ll new file mode 100644 index 00000000000000..aacbc942f1a56b --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fsub-03.ll @@ -0,0 +1,30 @@ +; Test atomic long double subtraction. Expect a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(ptr %ret, ptr %src, ptr %b) { +; CHECK-LABEL: f1: +; CHECK: [[FBL:%f[0-9]+]], 0(%r4) +; CHECK: [[FBH:%f[0-9]+]], 8(%r4) +; CHECK: [[FSL:%f[0-9]+]], 0(%r3) +; CHECK: [[FSH:%f[0-9]+]], 8(%r3) +; CHECK: [[LABEL:\.L.+]]: +; CHECK: lgdr [[RISH:%r[0-9]+]], [[FSH]] +; CHECK: lgdr [[RISL:%r[0-9]+]], [[FSL]] +; CHECK: sxbr [[FSL]], [[FBL]] +; CHECK: lgdr [[ROSH:%r[0-9]+]], [[FSH]] +; CHECK: lgdr [[ROSL:%r[0-9]+]], [[FSL]] +; CHECK: cdsg [[RISL]], [[ROSL]], 0(%r3) +; CHECK: stg [[RISH]], 168(%r15) +; CHECK: stg [[RISL]], 160(%r15) +; CHECK: ld [[FSL]], 160(%r15) +; CHECK: ld [[FSH]], 168(%r15) +; CHECK: jl [[LABEL]] +; CHECK: std [[FSL]], 0(%r2) +; CHECK: std [[FSH]], 8(%r2) +; CHECK: br %r14 + %val = load fp128, ptr %b + %res = atomicrmw fsub ptr %src, fp128 %val seq_cst + store fp128 %res, ptr %ret + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-udec_wrap.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-udec_wrap.ll new file mode 100644 index 00000000000000..d6427fc29f0516 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-udec_wrap.ll @@ -0,0 +1,28 @@ +; Test decrementing until to a minimum value. Expect a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define i64 @f1(ptr %src, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: lgr [[SRC:%r[0-9]+]], %r2 +; CHECK: lg [[RI:%r[0-9]+]], 0(%r2) +; CHECK: j [[L2:\.L.+]] +; CHECK: [[L1:\.L.+]]: +; CHECK: csg [[RI]], [[RO:%r[0-9]+]], 0([[SRC]]) +; CHECK: je [[L4:\.L.+]] +; CHECK: [[L2]]: +; CHECK: lgr [[RO]], [[RI]] +; CHECK: slgfi [[RO]], 1 +; CHECK: lgr [[RB:%r[0-9]+]], %r3 +; CHECK: clgrjh [[RI]], %r3, [[L3:\.L.+]] +; CHECK: lgr [[RB]], [[RO]] +; CHECK: [[L3]]: +; CHECK: lgr [[RO]], [[RI]] +; CHECK: slgfi [[RO]], 1 +; CHECK: lgr [[RO]], %r3 +; CHECK: jle [[L1]] +; CHECK: [[L4]]: +; CHECK: br %r14 + %res = atomicrmw udec_wrap ptr %src, i64 %b seq_cst + ret i64 %res +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-uinc_wrap.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-uinc_wrap.ll new file mode 100644 index 00000000000000..cf90d756535959 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-uinc_wrap.ll @@ -0,0 +1,22 @@ +; Test incrementing up to a maximum value. Expect a compare-and-swap loop. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define i64 @f1(ptr %src, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: lgr [[SRC:%r[0-9]+]], %r2 +; CHECK: lg [[RI:%r[0-9]+]], 0(%r2) +; CHECK: j [[L2:\.L.+]] +; CHECK: [[L1:\.L.+]]: +; CHECK: csg [[RI]], [[RO:%r[0-9]+]], 0([[SRC]]) +; CHECK: je [[L3:\.L.+]] +; CHECK: [[L2]]: +; CHECK: lghi [[RO]], 0 +; CHECK: clgrjhe [[RI]], %r3, [[L1]] +; CHECK: la [[RO]], 1([[RI]]) +; CHECK: j [[L1]] +; CHECK: [[L3]]: +; CHECK: br %r14 + %res = atomicrmw uinc_wrap ptr %src, i64 %b seq_cst + ret i64 %res +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-05.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-05.ll new file mode 100644 index 00000000000000..d86e476115c386 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-05.ll @@ -0,0 +1,17 @@ +; Test float atomic exchange. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define float @f1(ptr %src, float %b) { +; CHECK-LABEL: f1: +; CHECK: l [[RI:%r[0-9]+]], 0(%r2) +; CHECK: lgdr [[RO:%r[0-9]+]], %f0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: cs [[RI]], [[RO]], 0(%r2) +; CHECK: jl [[LABEL]] +; CHECK: sllg [[RI]], [[RI]], 32 +; CHECK: ldgr %f0, [[RI]] +; CHECK: br %r14 + %res = atomicrmw xchg ptr %src, float %b seq_cst + ret float %res +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-06.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-06.ll new file mode 100644 index 00000000000000..9b78e783035dd8 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-06.ll @@ -0,0 +1,16 @@ +; Test double atomic exchange. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define double @f1(ptr %src, double %b) { +; CHECK-LABEL: f1: +; CHECK: lg [[RI:%r[0-9]+]], 0(%r2) +; CHECK: lgdr [[RO:%r[0-9]+]], %f0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: csg [[RI]], [[RO]], 0(%r2) +; CHECK: jl [[LABEL]] +; CHECK: ldgr %f0, [[RI]] +; CHECK: br %r14 + %res = atomicrmw xchg ptr %src, double %b seq_cst + ret double %res +} diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-07.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-07.ll new file mode 100644 index 00000000000000..80cc85158e45e6 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-07.ll @@ -0,0 +1,24 @@ +; Test long double atomic exchange. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(ptr %ret, ptr %src, ptr %b) { +; CHECK-LABEL: f1: +; CHECK: lg [[RH:%r[0-9]+]], 8(%r4) +; CHECK: lgr [[RET:%r[0-9]+]], %r2 +; CHECK: lg [[RL:%r[0-9]+]], 0(%r4) +; CHECK: stg [[RH]], 168(%r15) +; CHECK: la %r2, 176(%r15) +; CHECK: la %r4, 160(%r15) +; CHECK: stg [[RL]], 160(%r15) +; CHECK: brasl %r14, __sync_lock_test_and_set_16@PLT +; CHECK: lg [[RH2:%r[0-9]+]], 184(%r15) +; CHECK: lg [[RL2:%r[0-9]+]], 176(%r15) +; CHECK: stg [[RH]], 8([[RET]]) +; CHECK: stg [[RL]], 0([[RET]]) +; CHECK: br %r14 + %val = load fp128, ptr %b, align 8 + %res = atomicrmw xchg ptr %src, fp128 %val seq_cst + store fp128 %res, ptr %ret, align 8 + ret void +} From b26e6a8eb57da6bc0f6d968a7ff87be0f3862683 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Tue, 31 Oct 2023 09:57:10 +0100 Subject: [PATCH 134/144] [GlobalISel] Add `GITypeOf` special type (#66079) Allows creating a register/immediate that uses the same type as a matched operand. --- llvm/docs/GlobalISel/MIRPatterns.rst | 42 +++ .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 - .../CodeGen/GlobalISel/GIMatchTableExecutor.h | 10 + .../GlobalISel/GIMatchTableExecutorImpl.h | 32 +- .../include/llvm/Target/GlobalISel/Combine.td | 25 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 12 - .../match-table-typeof.td | 49 ++++ .../operand-types.td | 28 +- .../pattern-parsing.td | 25 +- .../typeof-errors.td | 72 +++++ .../TableGen/GlobalISelCombinerEmitter.cpp | 277 +++++++++++++++--- llvm/utils/TableGen/GlobalISelMatchTable.cpp | 36 ++- llvm/utils/TableGen/GlobalISelMatchTable.h | 89 +++++- 13 files changed, 621 insertions(+), 79 deletions(-) create mode 100644 llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-typeof.td create mode 100644 llvm/test/TableGen/GlobalISelCombinerEmitter/typeof-errors.td diff --git a/llvm/docs/GlobalISel/MIRPatterns.rst b/llvm/docs/GlobalISel/MIRPatterns.rst index fa70311f48572d..a3883b14b3e0bd 100644 --- a/llvm/docs/GlobalISel/MIRPatterns.rst +++ b/llvm/docs/GlobalISel/MIRPatterns.rst @@ -101,6 +101,48 @@ pattern, you can try naming your patterns to see exactly where the issue is. // using $x again here copies operand 1 from G_AND into the new inst. (apply (COPY $root, $x)) +Types +----- + +ValueType +~~~~~~~~~ + +Subclasses of ``ValueType`` are valid types, e.g. ``i32``. + +GITypeOf +~~~~~~~~ + +``GITypeOf<"$x">`` is a ``GISpecialType`` that allows for the creation of a +register or immediate with the same type as another (register) operand. + +Operand: + +* An operand name as a string, prefixed by ``$``. + +Semantics: + +* Can only appear in an 'apply' pattern. +* The operand name used must appear in the 'match' pattern of the + same ``GICombineRule``. + +.. code-block:: text + :caption: Example: Immediate + + def mul_by_neg_one: GICombineRule < + (defs root:$root), + (match (G_MUL $dst, $x, -1)), + (apply (G_SUB $dst, (GITypeOf<"$x"> 0), $x)) + >; + +.. code-block:: text + :caption: Example: Temp Reg + + def Test0 : GICombineRule< + (defs root:$dst), + (match (G_FMUL $dst, $src, -1)), + (apply (G_FSUB $dst, $src, $tmp), + (G_FNEG GITypeOf<"$dst">:$tmp, $src))>; + Builtin Operations ------------------ diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 65299e852574bd..ba72a3b71ffd70 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -405,9 +405,6 @@ class CombinerHelper { void applyCombineTruncOfShift(MachineInstr &MI, std::pair &MatchInfo); - /// Transform G_MUL(x, -1) to G_SUB(0, x) - void applyCombineMulByNegativeOne(MachineInstr &MI); - /// Return true if any explicit use operand on \p MI is defined by a /// G_IMPLICIT_DEF. bool matchAnyExplicitUseIsUndef(MachineInstr &MI); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h index 209f80c6d6d287..6fcd9d09e1863c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h @@ -275,6 +275,12 @@ enum { /// - StoreIdx - Store location in RecordedOperands. GIM_RecordNamedOperand, + /// Records an operand's register type into the set of temporary types. + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - TempTypeIdx - Temp Type Index, always negative. + GIM_RecordRegType, + /// Fail the current try-block, or completely fail to match if there is no /// current try-block. GIM_Reject, @@ -522,6 +528,10 @@ class GIMatchTableExecutor { /// list. Currently such predicates don't have more then 3 arguments. std::array RecordedOperands; + /// Types extracted from an instruction's operand. + /// Whenever a type index is negative, we look here instead. + SmallVector RecordedTypes; + MatcherState(unsigned MaxRenderers); }; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h index fb03d5ec0bc89a..32e2f21d775f30 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h @@ -92,6 +92,14 @@ bool GIMatchTableExecutor::executeMatchTable( return true; }; + // If the index is >= 0, it's an index in the type objects generated by + // TableGen. If the index is <0, it's an index in the recorded types object. + auto getTypeFromIdx = [&](int64_t Idx) -> LLT { + if (Idx >= 0) + return ExecInfo.TypeObjects[Idx]; + return State.RecordedTypes[1 - Idx]; + }; + while (true) { assert(CurrentIdx != ~0u && "Invalid MatchTable index"); int64_t MatcherOpcode = MatchTable[CurrentIdx++]; @@ -627,8 +635,7 @@ bool GIMatchTableExecutor::executeMatchTable( << "), TypeID=" << TypeID << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); - if (!MO.isReg() || - MRI.getType(MO.getReg()) != ExecInfo.TypeObjects[TypeID]) { + if (!MO.isReg() || MRI.getType(MO.getReg()) != getTypeFromIdx(TypeID)) { if (handleReject() == RejectAndGiveUp) return false; } @@ -679,6 +686,25 @@ bool GIMatchTableExecutor::executeMatchTable( State.RecordedOperands[StoreIdx] = &State.MIs[InsnID]->getOperand(OpIdx); break; } + case GIM_RecordRegType: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t TypeIdx = MatchTable[CurrentIdx++]; + + DEBUG_WITH_TYPE(TgtExecutor::getName(), + dbgs() << CurrentIdx << ": GIM_RecordRegType(MIs[" + << InsnID << "]->getOperand(" << OpIdx + << "), TypeIdx=" << TypeIdx << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + assert(TypeIdx <= 0 && "Temp types always have negative indexes!"); + // Indexes start at -1. + TypeIdx = 1 - TypeIdx; + const auto &Op = State.MIs[InsnID]->getOperand(OpIdx); + if (State.RecordedTypes.size() <= (uint64_t)TypeIdx) + State.RecordedTypes.resize(TypeIdx + 1, LLT()); + State.RecordedTypes[TypeIdx] = MRI.getType(Op.getReg()); + break; + } case GIM_CheckRegBankForClass: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -1275,7 +1301,7 @@ bool GIMatchTableExecutor::executeMatchTable( int64_t TypeID = MatchTable[CurrentIdx++]; State.TempRegisters[TempRegID] = - MRI.createGenericVirtualRegister(ExecInfo.TypeObjects[TypeID]); + MRI.createGenericVirtualRegister(getTypeFromIdx(TypeID)); DEBUG_WITH_TYPE(TgtExecutor::getName(), dbgs() << CurrentIdx << ": TempRegs[" << TempRegID << "] = GIR_MakeTempReg(" << TypeID << ")\n"); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index bb8223ba3486a8..63c485a5a6c607 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -110,6 +110,24 @@ class GICombinePatFrag alts> { list Alternatives = alts; } +//===----------------------------------------------------------------------===// +// Pattern Special Types +//===----------------------------------------------------------------------===// + +class GISpecialType; + +// In an apply pattern, GITypeOf can be used to set the type of a new temporary +// register to match the type of a matched register. +// +// This can only be used on temporary registers defined by the apply pattern. +// +// TODO: Make this work in matchers as well? +// +// FIXME: Syntax is very ugly. +class GITypeOf : GISpecialType { + string OpName = opName; +} + //===----------------------------------------------------------------------===// // Pattern Builtins //===----------------------------------------------------------------------===// @@ -776,10 +794,9 @@ def trunc_shift: GICombineRule < // Transform (mul x, -1) -> (sub 0, x) def mul_by_neg_one: GICombineRule < - (defs root:$root), - (match (wip_match_opcode G_MUL):$root, - [{ return Helper.matchConstantOp(${root}->getOperand(2), -1); }]), - (apply [{ Helper.applyCombineMulByNegativeOne(*${root}); }]) + (defs root:$dst), + (match (G_MUL $dst, $x, -1)), + (apply (G_SUB $dst, (GITypeOf<"$x"> 0), $x)) >; // Fold (xor (and x, y), y) -> (and (not x), y) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 3c2b5f490ccb87..51c268ab77c222 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2351,18 +2351,6 @@ void CombinerHelper::applyCombineExtOfExt( } } -void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - - Builder.setInstrAndDebugLoc(MI); - Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg, - MI.getFlags()); - MI.eraseFromParent(); -} - bool CombinerHelper::matchCombineTruncOfExt( MachineInstr &MI, std::pair &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-typeof.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-typeof.td new file mode 100644 index 00000000000000..496d86aeef2d10 --- /dev/null +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-typeof.td @@ -0,0 +1,49 @@ +// RUN: llvm-tblgen -I %p/../../../include -gen-global-isel-combiner \ +// RUN: -combiners=MyCombiner %s | \ +// RUN: FileCheck %s + +include "llvm/Target/Target.td" +include "llvm/Target/GlobalISel/Combine.td" + +def MyTargetISA : InstrInfo; +def MyTarget : Target { let InstructionSet = MyTargetISA; } + +def Test0 : GICombineRule< + (defs root:$dst), + (match (G_MUL $dst, $src, -1)), + (apply (G_SUB $dst, (GITypeOf<"$src"> 0), $tmp), + (G_CONSTANT GITypeOf<"$dst">:$tmp, (GITypeOf<"$src"> 42)))>; + +// CHECK: const int64_t *GenMyCombiner::getMatchTable() const { +// CHECK-NEXT: constexpr static int64_t MatchTable0[] = { +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 0*/ 57, // Rule ID 0 // +// CHECK-NEXT: GIM_CheckSimplePredicate, GICXXPred_Simple_IsRule0Enabled, +// CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_MUL, +// CHECK-NEXT: // MIs[0] dst +// CHECK-NEXT: GIM_RecordRegType, /*MI*/0, /*Op*/0, /*TempTypeIdx*/-1, +// CHECK-NEXT: // MIs[0] src +// CHECK-NEXT: GIM_RecordRegType, /*MI*/0, /*Op*/1, /*TempTypeIdx*/-2, +// CHECK-NEXT: // MIs[0] Operand 2 +// CHECK-NEXT: GIM_CheckConstantInt, /*MI*/0, /*Op*/2, -1, +// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/-2, +// CHECK-NEXT: GIR_BuildConstant, /*TempRegID*/1, /*Val*/0, +// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/-1, +// CHECK-NEXT: // Combiner Rule #0: Test0 +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::G_CONSTANT, +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, +// CHECK-NEXT: GIR_AddCImm, /*InsnID*/0, /*Type*/-2, /*Imm*/42, +// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::G_SUB, +// CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/0, // dst +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/1, /*TempRegFlags*/0, +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/0, +// CHECK-NEXT: GIR_Done, +// CHECK-NEXT: // Label 0: @57 +// CHECK-NEXT: GIM_Reject, +// CHECK-NEXT: }; +// CHECK-NEXT: return MatchTable0; +// CHECK-NEXT: } + +def MyCombiner: GICombiner<"GenMyCombiner", [ + Test0 +]>; diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/operand-types.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/operand-types.td index c871e603e4e05a..4769bed9724012 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/operand-types.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/operand-types.td @@ -79,7 +79,33 @@ def PatFragTest0 : GICombineRule< (match (FooPF $dst)), (apply (COPY $dst, (i32 0)))>; + +// CHECK: (CombineRule name:TypeOfProp id:2 root:x +// CHECK-NEXT: (MatchPats +// CHECK-NEXT: __TypeOfProp_match_0:(CodeGenInstructionPattern G_ZEXT operands:[$x, $y]) +// CHECK-NEXT: ) +// CHECK-NEXT: (ApplyPats +// CHECK-NEXT: __TypeOfProp_apply_0:(CodeGenInstructionPattern G_ANYEXT operands:[$x, GITypeOf<$y>:$tmp]) +// CHECK-NEXT: __TypeOfProp_apply_1:(CodeGenInstructionPattern G_ANYEXT operands:[GITypeOf<$y>:$tmp, $y]) +// CHECK-NEXT: ) +// CHECK-NEXT: (OperandTable MatchPats +// CHECK-NEXT: x -> __TypeOfProp_match_0 +// CHECK-NEXT: y -> +// CHECK-NEXT: ) +// CHECK-NEXT: (OperandTable ApplyPats +// CHECK-NEXT: tmp -> __TypeOfProp_apply_1 +// CHECK-NEXT: x -> __TypeOfProp_apply_0 +// CHECK-NEXT: y -> +// CHECK-NEXT: ) +// CHECK-NEXT: ) +def TypeOfProp : GICombineRule< + (defs root:$x), + (match (G_ZEXT $x, $y)), + (apply (G_ANYEXT $x, GITypeOf<"$y">:$tmp), + (G_ANYEXT $tmp, $y))>; + def MyCombiner: GICombiner<"GenMyCombiner", [ InstTest0, - PatFragTest0 + PatFragTest0, + TypeOfProp ]>; diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/pattern-parsing.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/pattern-parsing.td index bc75b15233b551..fd41a7d1d72417 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/pattern-parsing.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/pattern-parsing.td @@ -297,6 +297,28 @@ def VariadicsOutTest : GICombineRule< (apply (COPY $a, (i32 0)), (COPY $b, (i32 0)))>; +// CHECK: (CombineRule name:TypeOfTest id:10 root:dst +// CHECK-NEXT: (MatchPats +// CHECK-NEXT: __TypeOfTest_match_0:(CodeGenInstructionPattern COPY operands:[$dst, $tmp]) +// CHECK-NEXT: __TypeOfTest_match_1:(CodeGenInstructionPattern G_ZEXT operands:[$tmp, $src]) +// CHECK-NEXT: ) +// CHECK-NEXT: (ApplyPats +// CHECK-NEXT: __TypeOfTest_apply_0:(CodeGenInstructionPattern G_MUL operands:[$dst, (GITypeOf<$src> 0), (GITypeOf<$dst> -1)]) +// CHECK-NEXT: ) +// CHECK-NEXT: (OperandTable MatchPats +// CHECK-NEXT: dst -> __TypeOfTest_match_0 +// CHECK-NEXT: src -> +// CHECK-NEXT: tmp -> __TypeOfTest_match_1 +// CHECK-NEXT: ) +// CHECK-NEXT: (OperandTable ApplyPats +// CHECK-NEXT: dst -> __TypeOfTest_apply_0 +// CHECK-NEXT: ) +// CHECK-NEXT: ) +def TypeOfTest : GICombineRule< + (defs root:$dst), + (match (COPY $dst, $tmp), + (G_ZEXT $tmp, $src)), + (apply (G_MUL $dst, (GITypeOf<"$src"> 0), (GITypeOf<"$dst"> -1)))>; def MyCombiner: GICombiner<"GenMyCombiner", [ WipOpcodeTest0, @@ -308,5 +330,6 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ PatFragTest0, PatFragTest1, VariadicsInTest, - VariadicsOutTest + VariadicsOutTest, + TypeOfTest ]>; diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/typeof-errors.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/typeof-errors.td new file mode 100644 index 00000000000000..6040d6def44976 --- /dev/null +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/typeof-errors.td @@ -0,0 +1,72 @@ +// RUN: not llvm-tblgen -I %p/../../../include -gen-global-isel-combiner \ +// RUN: -combiners=MyCombiner %s 2>&1| \ +// RUN: FileCheck %s -implicit-check-not=error: + +include "llvm/Target/Target.td" +include "llvm/Target/GlobalISel/Combine.td" + +def MyTargetISA : InstrInfo; +def MyTarget : Target { let InstructionSet = MyTargetISA; } + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand name format 'unknown' in GITypeOf: expected '$' followed by an operand name +def NoDollarSign : GICombineRule< + (defs root:$dst), + (match (G_ZEXT $dst, $src)), + (apply (G_ANYEXT $dst, (GITypeOf<"unknown"> 0)))>; + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: 'unknown' ('GITypeOf<$unknown>') does not refer to a matched operand! +def UnknownOperand : GICombineRule< + (defs root:$dst), + (match (G_ZEXT $dst, $src)), + (apply (G_ANYEXT $dst, (GITypeOf<"$unknown"> 0)))>; + +// CHECK: :[[@LINE+2]]:{{[0-9]+}}: error: GISpecialType is not supported in 'match' patterns +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: note: operand 1 of '__UseInMatch_match_0' has type 'GITypeOf<$dst>' +def UseInMatch : GICombineRule< + (defs root:$dst), + (match (G_ZEXT $dst, (GITypeOf<"$dst"> 0))), + (apply (G_ANYEXT $dst, (i32 0)))>; + +// CHECK: :[[@LINE+3]]:{{[0-9]+}}: error: GISpecialType is not supported in GICombinePatFrag +// CHECK: :[[@LINE+2]]:{{[0-9]+}}: note: operand 1 of '__PFWithTypeOF_alt0_pattern_0' has type 'GITypeOf<$dst> +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Could not parse GICombinePatFrag 'PFWithTypeOF' +def PFWithTypeOF: GICombinePatFrag< + (outs $dst), (ins), + [(pattern (G_ANYEXT $dst, (GITypeOf<"$dst"> 0)))]>; + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Failed to parse pattern: '(PFWithTypeOF ?:$dst)' +def UseInPF: GICombineRule< + (defs root:$dst), + (match (PFWithTypeOF $dst)), + (apply (G_ANYEXT $dst, (i32 0)))>; + +// CHECK: :[[@LINE+2]]:{{[0-9]+}}: error: GISpecialType is not supported in 'match' patterns +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: note: operand 1 of '__InferredUseInMatch_match_0' has type 'GITypeOf<$dst>' +def InferredUseInMatch : GICombineRule< + (defs root:$dst), + (match (G_ZEXT $dst, $src)), + (apply (G_ANYEXT $dst, GITypeOf<"$dst">:$src))>; + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: conflicting types for operand 'src': first seen with 'i32' in '__InferenceConflict_match_0, now seen with 'GITypeOf<$dst>' in '__InferenceConflict_apply_0' +def InferenceConflict : GICombineRule< + (defs root:$dst), + (match (G_ZEXT $dst, i32:$src)), + (apply (G_ANYEXT $dst, GITypeOf<"$dst">:$src))>; + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: 'tmp' ('GITypeOf<$tmp>') does not refer to a matched operand! +def TypeOfApplyTmp : GICombineRule< + (defs root:$dst), + (match (G_ZEXT $dst, $src)), + (apply (G_ANYEXT $dst, i32:$tmp), + (G_ANYEXT $tmp, (GITypeOf<"$tmp"> 0)))>; + +// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Failed to parse one or more rules +def MyCombiner: GICombiner<"GenMyCombiner", [ + NoDollarSign, + UnknownOperand, + UseInMatch, + UseInPF, + InferredUseInMatch, + InferenceConflict, + TypeOfApplyTmp +]>; diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp index 7992cb4362a171..0c7b33a7b9d889 100644 --- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp @@ -73,6 +73,8 @@ constexpr StringLiteral CXXApplyPrefix = "GICXXCustomAction_CombineApply"; constexpr StringLiteral CXXPredPrefix = "GICXXPred_MI_Predicate_"; constexpr StringLiteral PatFragClassName = "GICombinePatFrag"; constexpr StringLiteral BuiltinInstClassName = "GIBuiltinInst"; +constexpr StringLiteral SpecialTyClassName = "GISpecialType"; +constexpr StringLiteral TypeOfClassName = "GITypeOf"; std::string getIsEnabledPredicateEnumName(unsigned CombinerRuleID) { return "GICXXPred_Simple_IsRule" + to_string(CombinerRuleID) + "Enabled"; @@ -123,11 +125,6 @@ template auto values(Container &&C) { return map_range(C, [](auto &Entry) -> auto & { return Entry.second; }); } -LLTCodeGen getLLTCodeGenFromRecord(const Record *Ty) { - assert(Ty->isSubClassOf("ValueType")); - return LLTCodeGen(*MVTToLLT(getValueType(Ty))); -} - //===- MatchData Handling -------------------------------------------------===// /// Represents MatchData defined by the match stage and required by the apply @@ -292,6 +289,116 @@ class CXXPredicateCode { CXXPredicateCode::CXXPredicateCodePool CXXPredicateCode::AllCXXMatchCode; CXXPredicateCode::CXXPredicateCodePool CXXPredicateCode::AllCXXApplyCode; +//===- PatternType --------------------------------------------------------===// + +/// Represent the type of a Pattern Operand. +/// +/// Types have two form: +/// - LLTs, which are straightforward. +/// - Special types, e.g. GITypeOf +class PatternType { +public: + PatternType() = default; + PatternType(const Record *R) : R(R) {} + + bool isValidType() const { return !R || isLLT() || isSpecial(); } + + bool isLLT() const { return R && R->isSubClassOf("ValueType"); } + bool isSpecial() const { return R && R->isSubClassOf(SpecialTyClassName); } + bool isTypeOf() const { return R && R->isSubClassOf(TypeOfClassName); } + + StringRef getTypeOfOpName() const; + LLTCodeGen getLLTCodeGen() const; + + bool checkSemantics(ArrayRef DiagLoc) const; + + LLTCodeGenOrTempType getLLTCodeGenOrTempType(RuleMatcher &RM) const; + + explicit operator bool() const { return R != nullptr; } + + bool operator==(const PatternType &Other) const; + bool operator!=(const PatternType &Other) const { return !operator==(Other); } + + std::string str() const; + +private: + StringRef getRawOpName() const { return R->getValueAsString("OpName"); } + + const Record *R = nullptr; +}; + +StringRef PatternType::getTypeOfOpName() const { + assert(isTypeOf()); + StringRef Name = getRawOpName(); + Name.consume_front("$"); + return Name; +} + +LLTCodeGen PatternType::getLLTCodeGen() const { + assert(isLLT()); + return *MVTToLLT(getValueType(R)); +} + +LLTCodeGenOrTempType +PatternType::getLLTCodeGenOrTempType(RuleMatcher &RM) const { + assert(isValidType()); + + if (isLLT()) + return getLLTCodeGen(); + + assert(isTypeOf()); + auto &OM = RM.getOperandMatcher(getTypeOfOpName()); + return OM.getTempTypeIdx(RM); +} + +bool PatternType::checkSemantics(ArrayRef DiagLoc) const { + if (!isTypeOf()) + return true; + + auto RawOpName = getRawOpName(); + if (RawOpName.starts_with("$")) + return true; + + PrintError(DiagLoc, "invalid operand name format '" + RawOpName + "' in " + + TypeOfClassName + + ": expected '$' followed by an operand name"); + return false; +} + +bool PatternType::operator==(const PatternType &Other) const { + if (R == Other.R) { + if (R && R->getName() != Other.R->getName()) { + dbgs() << "Same ptr but: " << R->getName() << " and " + << Other.R->getName() << "?\n"; + assert(false); + } + return true; + } + + if (isTypeOf() && Other.isTypeOf()) + return getTypeOfOpName() == Other.getTypeOfOpName(); + + return false; +} + +std::string PatternType::str() const { + if (!R) + return ""; + + if (!isValidType()) + return ""; + + if (isLLT()) + return R->getName().str(); + + assert(isSpecial()); + + if (isTypeOf()) + return (TypeOfClassName + "<$" + getTypeOfOpName() + ">").str(); + + llvm_unreachable("Unknown type!"); +} + //===- Pattern Base Class -------------------------------------------------===// /// Base class for all patterns that can be written in an `apply`, `match` or @@ -499,13 +606,15 @@ class InstructionOperand { public: using IntImmTy = int64_t; - InstructionOperand(IntImmTy Imm, StringRef Name, const Record *Type) + InstructionOperand(IntImmTy Imm, StringRef Name, PatternType Type) : Value(Imm), Name(insertStrRef(Name)), Type(Type) { - assert(!Type || Type->isSubClassOf("ValueType")); + assert(Type.isValidType()); } - InstructionOperand(StringRef Name, const Record *Type) - : Name(insertStrRef(Name)), Type(Type) {} + InstructionOperand(StringRef Name, PatternType Type) + : Name(insertStrRef(Name)), Type(Type) { + assert(Type.isValidType()); + } bool isNamedImmediate() const { return hasImmValue() && isNamedOperand(); } @@ -527,11 +636,12 @@ class InstructionOperand { void setIsDef(bool Value = true) { Def = Value; } bool isDef() const { return Def; } - void setType(const Record *R) { - assert((!Type || (Type == R)) && "Overwriting type!"); - Type = R; + void setType(PatternType NewType) { + assert((!Type || (Type == NewType)) && "Overwriting type!"); + assert(NewType.isValidType()); + Type = NewType; } - const Record *getType() const { return Type; } + PatternType getType() const { return Type; } std::string describe() const { if (!hasImmValue()) @@ -547,11 +657,11 @@ class InstructionOperand { OS << ""; bool NeedsColon = true; - if (const Record *Ty = getType()) { + if (Type) { if (hasImmValue()) - OS << "(" << Ty->getName() << " " << getImmValue() << ")"; + OS << "(" << Type.str() << " " << getImmValue() << ")"; else - OS << Ty->getName(); + OS << Type.str(); } else if (hasImmValue()) OS << getImmValue(); else @@ -566,7 +676,7 @@ class InstructionOperand { private: std::optional Value; StringRef Name; - const Record *Type = nullptr; + PatternType Type; bool Def = false; }; @@ -622,6 +732,10 @@ class InstructionPattern : public Pattern { virtual StringRef getInstName() const = 0; + /// Diagnoses all uses of special types in this Pattern and returns true if at + /// least one diagnostic was emitted. + bool diagnoseAllSpecialTypes(ArrayRef Loc, Twine Msg) const; + void reportUnreachable(ArrayRef Locs) const; virtual bool checkSemantics(ArrayRef Loc); @@ -633,6 +747,20 @@ class InstructionPattern : public Pattern { SmallVector Operands; }; +bool InstructionPattern::diagnoseAllSpecialTypes(ArrayRef Loc, + Twine Msg) const { + bool HasDiag = false; + for (const auto &[Idx, Op] : enumerate(operands())) { + if (Op.getType().isSpecial()) { + PrintError(Loc, Msg); + PrintNote(Loc, "operand " + Twine(Idx) + " of '" + getName() + + "' has type '" + Op.getType().str() + "'"); + HasDiag = true; + } + } + return HasDiag; +} + void InstructionPattern::reportUnreachable(ArrayRef Locs) const { PrintError(Locs, "pattern '" + getName() + "' ('" + getInstName() + "') is unreachable from the pattern root!"); @@ -829,17 +957,20 @@ unsigned CodeGenInstructionPattern::getNumInstOperands() const { /// It infers the type of each operand, check it's consistent with the known /// type of the operand, and then sets all of the types in all operands in /// setAllOperandTypes. +/// +/// It also handles verifying correctness of special types. class OperandTypeChecker { public: OperandTypeChecker(ArrayRef DiagLoc) : DiagLoc(DiagLoc) {} - bool check(InstructionPattern *P); + bool check(InstructionPattern *P, + std::function VerifyTypeOfOperand); void setAllOperandTypes(); private: struct OpTypeInfo { - const Record *Type = nullptr; + PatternType Type; InstructionPattern *TypeSrc = nullptr; }; @@ -849,16 +980,26 @@ class OperandTypeChecker { SmallVector Pats; }; -bool OperandTypeChecker::check(InstructionPattern *P) { +bool OperandTypeChecker::check( + InstructionPattern *P, + std::function VerifyTypeOfOperand) { Pats.push_back(P); - for (auto &Op : P->named_operands()) { - const Record *Ty = Op.getType(); + for (auto &Op : P->operands()) { + const auto Ty = Op.getType(); if (!Ty) continue; - auto &Info = Types[Op.getOperandName()]; + if (!Ty.checkSemantics(DiagLoc)) + return false; + + if (Ty.isTypeOf() && !VerifyTypeOfOperand(Ty)) + return false; + if (!Op.isNamedOperand()) + continue; + + auto &Info = Types[Op.getOperandName()]; if (!Info.Type) { Info.Type = Ty; Info.TypeSrc = P; @@ -868,9 +1009,9 @@ bool OperandTypeChecker::check(InstructionPattern *P) { if (Info.Type != Ty) { PrintError(DiagLoc, "conflicting types for operand '" + Op.getOperandName() + "': first seen with '" + - Info.Type->getName() + "' in '" + + Info.Type.str() + "' in '" + Info.TypeSrc->getName() + ", now seen with '" + - Ty->getName() + "' in '" + P->getName() + "'"); + Ty.str() + "' in '" + P->getName() + "'"); return false; } } @@ -1058,7 +1199,12 @@ bool PatFrag::checkSemantics() { PatFragClassName); return false; case Pattern::K_CXX: + continue; case Pattern::K_CodeGenInstruction: + if (cast(Pat.get())->diagnoseAllSpecialTypes( + Def.getLoc(), SpecialTyClassName + " is not supported in " + + PatFragClassName)) + return false; continue; case Pattern::K_PatFrag: // TODO: It's just that the emitter doesn't handle it but technically @@ -1142,12 +1288,16 @@ bool PatFrag::checkSemantics() { // TODO: find unused params + const auto CheckTypeOf = [&](const PatternType &) -> bool { + llvm_unreachable("GITypeOf should have been rejected earlier!"); + }; + // Now, typecheck all alternatives. for (auto &Alt : Alts) { OperandTypeChecker OTC(Def.getLoc()); for (auto &Pat : Alt.Pats) { if (auto *IP = dyn_cast(Pat.get())) { - if (!OTC.check(IP)) + if (!OTC.check(IP, CheckTypeOf)) return false; } } @@ -1954,21 +2104,49 @@ bool CombineRuleBuilder::hasEraseRoot() const { bool CombineRuleBuilder::typecheckPatterns() { OperandTypeChecker OTC(RuleDef.getLoc()); + const auto CheckMatchTypeOf = [&](const PatternType &) -> bool { + // We'll reject those after we're done inferring + return true; + }; + for (auto &Pat : values(MatchPats)) { if (auto *IP = dyn_cast(Pat.get())) { - if (!OTC.check(IP)) + if (!OTC.check(IP, CheckMatchTypeOf)) return false; } } + const auto CheckApplyTypeOf = [&](const PatternType &Ty) { + // GITypeOf<"$x"> can only be used if "$x" is a matched operand. + const auto OpName = Ty.getTypeOfOpName(); + if (MatchOpTable.lookup(OpName).Found) + return true; + + PrintError("'" + OpName + "' ('" + Ty.str() + + "') does not refer to a matched operand!"); + return false; + }; + for (auto &Pat : values(ApplyPats)) { if (auto *IP = dyn_cast(Pat.get())) { - if (!OTC.check(IP)) + if (!OTC.check(IP, CheckApplyTypeOf)) return false; } } OTC.setAllOperandTypes(); + + // Always check this after in case inference adds some special types to the + // match patterns. + for (auto &Pat : values(MatchPats)) { + if (auto *IP = dyn_cast(Pat.get())) { + if (IP->diagnoseAllSpecialTypes( + RuleDef.getLoc(), + SpecialTyClassName + " is not supported in 'match' patterns")) { + return false; + } + } + } return true; } @@ -2461,10 +2639,12 @@ bool CombineRuleBuilder::parseInstructionPatternOperand( if (DagOp->getNumArgs() != 1) return ParseErr(); - Record *ImmTy = DagOp->getOperatorAsDef(RuleDef.getLoc()); - if (!ImmTy->isSubClassOf("ValueType")) { + const Record *TyDef = DagOp->getOperatorAsDef(RuleDef.getLoc()); + PatternType ImmTy(TyDef); + if (!ImmTy.isValidType()) { PrintError("cannot parse immediate '" + OpInit->getAsUnquotedString() + - "', '" + ImmTy->getName() + "' is not a ValueType!"); + "', '" + TyDef->getName() + "' is not a ValueType or " + + SpecialTyClassName); return false; } @@ -2491,12 +2671,13 @@ bool CombineRuleBuilder::parseInstructionPatternOperand( return false; } const Record *Def = DefI->getDef(); - if (!Def->isSubClassOf("ValueType")) { + PatternType Ty(Def); + if (!Ty.isValidType()) { PrintError("invalid operand type: '" + Def->getName() + "' is not a ValueType"); return false; } - IP.addOperand(OpName->getAsUnquotedString(), Def); + IP.addOperand(OpName->getAsUnquotedString(), Ty); return true; } @@ -2823,8 +3004,8 @@ bool CombineRuleBuilder::emitPatFragMatchPattern( StringRef PFName = PF.getName(); PrintWarning("impossible type constraints: operand " + Twine(PIdx) + " of '" + PFP.getName() + "' has type '" + - ArgOp.getType()->getName() + "', but '" + PFName + - "' constrains it to '" + O.getType()->getName() + "'"); + ArgOp.getType().str() + "', but '" + PFName + + "' constrains it to '" + O.getType().str() + "'"); if (ArgOp.isNamedOperand()) PrintNote("operand " + Twine(PIdx) + " of '" + PFP.getName() + "' is '" + ArgOp.getOperandName() + "'"); @@ -3055,17 +3236,18 @@ bool CombineRuleBuilder::emitInstructionApplyPattern( // This is a brand new register. TempRegID = M.allocateTempRegID(); OperandToTempRegID[OpName] = TempRegID; - const Record *Ty = Op.getType(); + const auto Ty = Op.getType(); if (!Ty) { PrintError("def of a new register '" + OpName + "' in the apply patterns must have a type"); return false; } + declareTempRegExpansion(CE, TempRegID, OpName); // Always insert the action at the beginning, otherwise we may end up // using the temp reg before it's available. M.insertAction( - M.actions_begin(), getLLTCodeGenFromRecord(Ty), TempRegID); + M.actions_begin(), Ty.getLLTCodeGenOrTempType(M), TempRegID); } DstMI.addRenderer(TempRegID); @@ -3088,7 +3270,7 @@ bool CombineRuleBuilder::emitCodeGenInstructionApplyImmOperand( // G_CONSTANT is a special case and needs a CImm though so this is likely a // mistake. const bool isGConstant = P.is("G_CONSTANT"); - const Record *Ty = O.getType(); + const auto Ty = O.getType(); if (!Ty) { if (isGConstant) { PrintError("'G_CONSTANT' immediate must be typed!"); @@ -3101,16 +3283,17 @@ bool CombineRuleBuilder::emitCodeGenInstructionApplyImmOperand( return true; } - LLTCodeGen LLT = getLLTCodeGenFromRecord(Ty); + auto ImmTy = Ty.getLLTCodeGenOrTempType(M); + if (isGConstant) { - DstMI.addRenderer(O.getImmValue(), LLT); + DstMI.addRenderer(O.getImmValue(), ImmTy); return true; } unsigned TempRegID = M.allocateTempRegID(); // Ensure MakeTempReg & the BuildConstantAction occur at the beginning. - auto InsertIt = - M.insertAction(M.actions_begin(), LLT, TempRegID); + auto InsertIt = M.insertAction(M.actions_begin(), + ImmTy, TempRegID); M.insertAction(++InsertIt, TempRegID, O.getImmValue()); DstMI.addRenderer(TempRegID); return true; @@ -3227,8 +3410,14 @@ bool CombineRuleBuilder::emitCodeGenInstructionMatchPattern( // Always emit a check for unnamed operands. if (OpName.empty() || !M.getOperandMatcher(OpName).contains()) { - if (const Record *Ty = RemappedO.getType()) - OM.addPredicate(getLLTCodeGenFromRecord(Ty)); + if (const auto Ty = RemappedO.getType()) { + // TODO: We could support GITypeOf here on the condition that the + // OperandMatcher exists already. Though it's clunky to make this work + // and isn't all that useful so it's just rejected in typecheckPatterns + // at this time. + assert(Ty.isLLT() && "Only LLTs are supported in match patterns!"); + OM.addPredicate(Ty.getLLTCodeGen()); + } } // Stop here if the operand is a def, or if it had no name. diff --git a/llvm/utils/TableGen/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/GlobalISelMatchTable.cpp index 9a4a375f34bdb9..6ec85269e6e20d 100644 --- a/llvm/utils/TableGen/GlobalISelMatchTable.cpp +++ b/llvm/utils/TableGen/GlobalISelMatchTable.cpp @@ -822,6 +822,15 @@ const OperandMatcher &RuleMatcher::getPhysRegOperandMatcher(Record *Reg) const { return *I->second; } +OperandMatcher &RuleMatcher::getOperandMatcher(StringRef Name) { + const auto &I = DefinedOperands.find(Name); + + if (I == DefinedOperands.end()) + PrintFatalError(SrcLoc, "Operand " + Name + " was not declared in matcher"); + + return *I->second; +} + const OperandMatcher &RuleMatcher::getOperandMatcher(StringRef Name) const { const auto &I = DefinedOperands.find(Name); @@ -1081,6 +1090,17 @@ void RecordNamedOperandMatcher::emitPredicateOpcodes(MatchTable &Table, << MatchTable::Comment("Name : " + Name) << MatchTable::LineBreak; } +//===- RecordRegisterType ------------------------------------------===// + +void RecordRegisterType::emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const { + assert(Idx < 0 && "Temp types always have negative indexes!"); + Table << MatchTable::Opcode("GIM_RecordRegType") << MatchTable::Comment("MI") + << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op") + << MatchTable::IntValue(OpIdx) << MatchTable::Comment("TempTypeIdx") + << MatchTable::IntValue(Idx) << MatchTable::LineBreak; +} + //===- ComplexPatternOperandMatcher ---------------------------------------===// void ComplexPatternOperandMatcher::emitPredicateOpcodes( @@ -1196,6 +1216,18 @@ std::string OperandMatcher::getOperandExpr(unsigned InsnVarID) const { unsigned OperandMatcher::getInsnVarID() const { return Insn.getInsnVarID(); } +TempTypeIdx OperandMatcher::getTempTypeIdx(RuleMatcher &Rule) { + if (TTIdx >= 0) { + // Temp type index not assigned yet, so assign one and add the necessary + // predicate. + TTIdx = Rule.getNextTempTypeIdx(); + assert(TTIdx < 0); + addPredicate(TTIdx); + return TTIdx; + } + return TTIdx; +} + void OperandMatcher::emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule) { if (!Optimized) { @@ -2092,9 +2124,7 @@ void MakeTempRegisterAction::emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const { Table << MatchTable::Opcode("GIR_MakeTempReg") << MatchTable::Comment("TempRegID") << MatchTable::IntValue(TempRegID) - << MatchTable::Comment("TypeID") - << MatchTable::NamedValue(Ty.getCxxEnumValue()) - << MatchTable::LineBreak; + << MatchTable::Comment("TypeID") << Ty << MatchTable::LineBreak; } } // namespace gi diff --git a/llvm/utils/TableGen/GlobalISelMatchTable.h b/llvm/utils/TableGen/GlobalISelMatchTable.h index 5608bab482bfd3..364f2a1ec725d5 100644 --- a/llvm/utils/TableGen/GlobalISelMatchTable.h +++ b/llvm/utils/TableGen/GlobalISelMatchTable.h @@ -273,6 +273,40 @@ extern std::set KnownTypes; /// MVTs that don't map cleanly to an LLT (e.g., iPTR, *any, ...). std::optional MVTToLLT(MVT::SimpleValueType SVT); +using TempTypeIdx = int64_t; +class LLTCodeGenOrTempType { +public: + LLTCodeGenOrTempType(const LLTCodeGen &LLT) : Data(LLT) {} + LLTCodeGenOrTempType(TempTypeIdx TempTy) : Data(TempTy) {} + + bool isLLTCodeGen() const { return std::holds_alternative(Data); } + bool isTempTypeIdx() const { + return std::holds_alternative(Data); + } + + const LLTCodeGen &getLLTCodeGen() const { + assert(isLLTCodeGen()); + return std::get(Data); + } + + TempTypeIdx getTempTypeIdx() const { + assert(isTempTypeIdx()); + return std::get(Data); + } + +private: + std::variant Data; +}; + +inline MatchTable &operator<<(MatchTable &Table, + const LLTCodeGenOrTempType &Ty) { + if (Ty.isLLTCodeGen()) + Table << MatchTable::NamedValue(Ty.getLLTCodeGen().getCxxEnumValue()); + else + Table << MatchTable::IntValue(Ty.getTempTypeIdx()); + return Table; +} + //===- Matchers -----------------------------------------------------------===// class Matcher { public: @@ -459,6 +493,9 @@ class RuleMatcher : public Matcher { /// ID for the next temporary register ID allocated with allocateTempRegID() unsigned NextTempRegID; + /// ID for the next recorded type. Starts at -1 and counts down. + TempTypeIdx NextTempTypeIdx = -1; + // HwMode predicate index for this rule. -1 if no HwMode. int HwModeIdx = -1; @@ -498,6 +535,8 @@ class RuleMatcher : public Matcher { RuleMatcher(RuleMatcher &&Other) = default; RuleMatcher &operator=(RuleMatcher &&Other) = default; + TempTypeIdx getNextTempTypeIdx() { return NextTempTypeIdx--; } + uint64_t getRuleID() const { return RuleID; } InstructionMatcher &addInstructionMatcher(StringRef SymbolicName); @@ -602,6 +641,7 @@ class RuleMatcher : public Matcher { } InstructionMatcher &getInstructionMatcher(StringRef SymbolicName) const; + OperandMatcher &getOperandMatcher(StringRef Name); const OperandMatcher &getOperandMatcher(StringRef Name) const; const OperandMatcher &getPhysRegOperandMatcher(Record *) const; @@ -762,6 +802,7 @@ class PredicateMatcher { OPM_RegBank, OPM_MBB, OPM_RecordNamedOperand, + OPM_RecordRegType, }; protected: @@ -963,6 +1004,30 @@ class RecordNamedOperandMatcher : public OperandPredicateMatcher { RuleMatcher &Rule) const override; }; +/// Generates code to store a register operand's type into the set of temporary +/// LLTs. +class RecordRegisterType : public OperandPredicateMatcher { +protected: + TempTypeIdx Idx; + +public: + RecordRegisterType(unsigned InsnVarID, unsigned OpIdx, TempTypeIdx Idx) + : OperandPredicateMatcher(OPM_RecordRegType, InsnVarID, OpIdx), Idx(Idx) { + } + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == OPM_RecordRegType; + } + + bool isIdentical(const PredicateMatcher &B) const override { + return OperandPredicateMatcher::isIdentical(B) && + Idx == cast(&B)->Idx; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override; +}; + /// Generates code to check that an operand is a particular target constant. class ComplexPatternOperandMatcher : public OperandPredicateMatcher { protected: @@ -1169,6 +1234,8 @@ class OperandMatcher : public PredicateListMatcher { /// countRendererFns(). unsigned AllocatedTemporariesBaseID; + TempTypeIdx TTIdx = 0; + public: OperandMatcher(InstructionMatcher &Insn, unsigned OpIdx, const std::string &SymbolicName, @@ -1196,6 +1263,11 @@ class OperandMatcher : public PredicateListMatcher { unsigned getOpIdx() const { return OpIdx; } unsigned getInsnVarID() const; + /// If this OperandMatcher has not been assigned a TempTypeIdx yet, assigns it + /// one and adds a `RecordRegisterType` predicate to this matcher. If one has + /// already been assigned, simply returns it. + TempTypeIdx getTempTypeIdx(RuleMatcher &Rule); + std::string getOperandExpr(unsigned InsnVarID) const; InstructionMatcher &getInstructionMatcher() const { return Insn; } @@ -1955,15 +2027,16 @@ class ImmRenderer : public OperandRenderer { protected: unsigned InsnID; int64_t Imm; - std::optional CImmLLT; + std::optional CImmLLT; public: ImmRenderer(unsigned InsnID, int64_t Imm) : OperandRenderer(OR_Imm), InsnID(InsnID), Imm(Imm) {} - ImmRenderer(unsigned InsnID, int64_t Imm, const LLTCodeGen &CImmLLT) + ImmRenderer(unsigned InsnID, int64_t Imm, const LLTCodeGenOrTempType &CImmLLT) : OperandRenderer(OR_Imm), InsnID(InsnID), Imm(Imm), CImmLLT(CImmLLT) { - KnownTypes.insert(CImmLLT); + if (CImmLLT.isLLTCodeGen()) + KnownTypes.insert(CImmLLT.getLLTCodeGen()); } static bool classof(const OperandRenderer *R) { @@ -1976,8 +2049,7 @@ class ImmRenderer : public OperandRenderer { "ConstantInt immediate are only for combiners!"); Table << MatchTable::Opcode("GIR_AddCImm") << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID) - << MatchTable::Comment("Type") - << MatchTable::NamedValue(CImmLLT->getCxxEnumValue()) + << MatchTable::Comment("Type") << *CImmLLT << MatchTable::Comment("Imm") << MatchTable::IntValue(Imm) << MatchTable::LineBreak; } else { @@ -2290,13 +2362,14 @@ class ConstrainOperandToRegClassAction : public MatchAction { /// instructions together. class MakeTempRegisterAction : public MatchAction { private: - LLTCodeGen Ty; + LLTCodeGenOrTempType Ty; unsigned TempRegID; public: - MakeTempRegisterAction(const LLTCodeGen &Ty, unsigned TempRegID) + MakeTempRegisterAction(const LLTCodeGenOrTempType &Ty, unsigned TempRegID) : MatchAction(AK_MakeTempReg), Ty(Ty), TempRegID(TempRegID) { - KnownTypes.insert(Ty); + if (Ty.isLLTCodeGen()) + KnownTypes.insert(Ty.getLLTCodeGen()); } static bool classof(const MatchAction *A) { From bbd61d807f86e0b043976eb39a1b0ad13f306a9c Mon Sep 17 00:00:00 2001 From: martinboehme Date: Tue, 31 Oct 2023 10:04:30 +0100 Subject: [PATCH 135/144] [clang][dataflow][NFC] Move `parseAll()` to TestingSupport and rename `parseFormulas()` (#70437) I'm working on a patch that will use this function from a different test. --- .../Analysis/FlowSensitive/SolverTest.cpp | 30 +++++-------------- .../Analysis/FlowSensitive/TestingSupport.cpp | 16 ++++++++++ .../Analysis/FlowSensitive/TestingSupport.h | 4 +++ 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp b/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp index a61e692088a871..71f6da93594e30 100644 --- a/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp @@ -25,6 +25,7 @@ using namespace clang; using namespace dataflow; using test::ConstraintContext; +using test::parseFormulas; using testing::_; using testing::AnyOf; using testing::Pair; @@ -33,21 +34,6 @@ using testing::UnorderedElementsAre; constexpr auto AssignedTrue = Solver::Result::Assignment::AssignedTrue; constexpr auto AssignedFalse = Solver::Result::Assignment::AssignedFalse; -std::vector parseAll(Arena &A, StringRef Lines) { - std::vector Result; - while (!Lines.empty()) { - auto [First, Rest] = Lines.split('\n'); - Lines = Rest; - if (First.trim().empty()) - continue; - if (auto F = A.parseFormula(First)) - Result.push_back(&*F); - else - ADD_FAILURE() << llvm::toString(F.takeError()); - } - return Result; -} - // Checks if the conjunction of `Vals` is satisfiable and returns the // corresponding result. Solver::Result solve(llvm::ArrayRef Vals) { @@ -277,7 +263,7 @@ TEST(SolverTest, IffWithUnits) { TEST(SolverTest, IffWithUnitsConflict) { Arena A; - auto Constraints = parseAll(A, R"( + auto Constraints = parseFormulas(A, R"( (V0 = V1) V0 !V1 @@ -287,7 +273,7 @@ TEST(SolverTest, IffWithUnitsConflict) { TEST(SolverTest, IffTransitiveConflict) { Arena A; - auto Constraints = parseAll(A, R"( + auto Constraints = parseFormulas(A, R"( (V0 = V1) (V1 = V2) V2 @@ -298,7 +284,7 @@ TEST(SolverTest, IffTransitiveConflict) { TEST(SolverTest, DeMorgan) { Arena A; - auto Constraints = parseAll(A, R"( + auto Constraints = parseFormulas(A, R"( (!(V0 | V1) = (!V0 & !V1)) (!(V2 & V3) = (!V2 | !V3)) )"); @@ -307,7 +293,7 @@ TEST(SolverTest, DeMorgan) { TEST(SolverTest, RespectsAdditionalConstraints) { Arena A; - auto Constraints = parseAll(A, R"( + auto Constraints = parseFormulas(A, R"( (V0 = V1) V0 !V1 @@ -317,7 +303,7 @@ TEST(SolverTest, RespectsAdditionalConstraints) { TEST(SolverTest, ImplicationIsEquivalentToDNF) { Arena A; - auto Constraints = parseAll(A, R"( + auto Constraints = parseFormulas(A, R"( !((V0 => V1) = (!V0 | V1)) )"); EXPECT_THAT(solve(Constraints), unsat()); @@ -325,7 +311,7 @@ TEST(SolverTest, ImplicationIsEquivalentToDNF) { TEST(SolverTest, ImplicationConflict) { Arena A; - auto Constraints = parseAll(A, R"( + auto Constraints = parseFormulas(A, R"( (V0 => V1) (V0 & !V1) )"); @@ -334,7 +320,7 @@ TEST(SolverTest, ImplicationConflict) { TEST(SolverTest, ReachedLimitsReflectsTimeouts) { Arena A; - auto Constraints = parseAll(A, R"( + auto Constraints = parseFormulas(A, R"( (!(V0 | V1) = (!V0 & !V1)) (!(V2 & V3) = (!V2 & !V3)) )"); diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp index 65c527ae63d2d7..e24ff25cb8292f 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/StringSet.h" #include "llvm/Support/Error.h" #include "llvm/Testing/Annotations/Annotations.h" +#include "gtest/gtest.h" #include #include #include @@ -218,3 +219,18 @@ const IndirectFieldDecl *test::findIndirectFieldDecl(ASTContext &ASTCtx, assert(Result != nullptr); return Result; } + +std::vector test::parseFormulas(Arena &A, StringRef Lines) { + std::vector Result; + while (!Lines.empty()) { + auto [First, Rest] = Lines.split('\n'); + Lines = Rest; + if (First.trim().empty()) + continue; + if (auto F = A.parseFormula(First)) + Result.push_back(&*F); + else + ADD_FAILURE() << llvm::toString(F.takeError()); + } + return Result; +} diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h index a8089d9b8c7a13..100d78378695d3 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h @@ -525,6 +525,10 @@ class ConstraintContext { } }; +/// Parses a list of formulas, separated by newlines, and returns them. +/// On parse errors, calls `ADD_FAILURE()` to fail the current test. +std::vector parseFormulas(Arena &A, StringRef Lines); + } // namespace test } // namespace dataflow } // namespace clang From e66629501189b15f24810b7a65cd814f76a25e23 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Tue, 31 Oct 2023 09:06:21 +0000 Subject: [PATCH 136/144] [mlir][ArmSME] Support lowering masked vector.outerproduct ops to SME (#69604) This patch adds support for lowering masked outer products to SME. This is done in two stages. First, vector.outerproducts (both masked and non-masked) are rewritten to arm_sme.outerproducts. The arm_sme.outerproduct op is close to vector.outerproduct, but supports masking on the operands rather than the result. It also limits the cases it handles to things that could be (directly) lowered to SME. This currently requires that the source of the mask is a vector.create_mask op. E.g.: ```mlir %mask = vector.create_mask %dimA, %dimB : vector<[4]x[4]xi1> %result = vector.mask %mask { vector.outerproduct %vecA, %vecB : vector<[4]xf32>, vector<[4]xf32> } : vector<[4]x[4]xi1> -> vector<[4]x[4]xf32> ``` Is rewritten to: ``` %maskA = vector.create_mask %dimA : vector<[4]xi1> %maskB = vector.create_mask %dimB : vector<[4]xi1> %result = arm_sme.outerproduct %vecA, %vecB masks(%maskA, %maskB) : vector<[4]xf32>, vector<[4]xf32> ``` (The same rewrite works for non-masked vector.outerproducts too) The arm_sme.outerproduct can then be directly lowered to SME intrinsics. --- .../mlir/Dialect/ArmSME/IR/ArmSMEOps.td | 109 ++++++++++++++- .../VectorToArmSME/VectorToArmSME.cpp | 102 +++++++++++++- .../Transforms/LegalizeForLLVMExport.cpp | 66 ++++----- mlir/test/Dialect/ArmSME/invalid.mlir | 22 +++ mlir/test/Dialect/ArmSME/roundtrip.mlir | 44 ++++++ .../Dialect/ArmSME/vector-ops-to-llvm.mlir | 67 ++++++++- .../Dialect/ArmSME/vector-ops-to-sme.mlir | 96 +++++++++++++ .../CPU/ArmSME/test-outerproduct-f32.mlir | 82 ++++++++++- .../CPU/ArmSME/test-outerproduct-f64.mlir | 130 ++++++++++++++++-- 9 files changed, 654 insertions(+), 64 deletions(-) diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td index b30d0fdb866bd2..2f6e52ff2badbe 100644 --- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td +++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td @@ -79,6 +79,27 @@ def ArmSME_TileSliceLayoutAttr : EnumAttr, + I32EnumAttrCase<"Sub", 1, "sub">, +]> { + let cppNamespace = "::mlir::arm_sme"; + let genSpecializedAttr = 0; +} + +/// An attribute that specifies how to combine a newly produced value with the +/// accumulator. This is similar to vector::CombiningKindAttr, but limited to +/// the functions that are valid for SME outer products. Add corresponds to a +/// MOPA and sub to a MOPS. +/// E.g. For f32: +/// FMOPA: https://developer.arm.com/documentation/ddi0602/2022-03/SME-Instructions/FMOPA--non-widening---Floating-point-outer-product-and-accumulate- +/// FMOPS: https://developer.arm.com/documentation/ddi0602/2022-03/SME-Instructions/FMOPS--non-widening---Floating-point-outer-product-and-subtract- +def ArmSME_CombiningKindAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; + let defaultValue = "CombiningKind::Add"; +} + //===----------------------------------------------------------------------===// // ArmSME op definitions //===----------------------------------------------------------------------===// @@ -209,7 +230,7 @@ def ZeroOp : ArmSME_Op<"zero", [Pure]> { let results = (outs SMETile:$res); let description = [{ Initialise ZA with 0. This operation is convenient wrapper for the SME - `zero` intrinsic and instruction. + `zero` intrinsic and instruction. Example 1: Zero an 8-bit element ZA tile. @@ -561,4 +582,90 @@ def MoveTileSliceToVectorOp : ArmSME_Op<"move_tile_slice_to_vector", [Pure, }]; } +class HasMatchingMaskTypeConstraint : + OptionalTypesMatchWith< + "shape of `" # operand # "Mask` matches `" # operand # "`", + operand, operand # "Mask", + "::llvm::cast($_self).cloneWith({}, IntegerType::get($_ctxt, 1))">; + +class OuterProductResultTileTypeConstraint : + OptionalTypesMatchWith($_self);" + " int64_t size = vectorType.getDimSize(0);" + " return VectorType::get(" + " { size, size }, vectorType.getElementType(), { true, true });" + "}()">; + +def OuterProductOp : + ArmSME_Op<"outerproduct", [Pure, + AttrSizedOperandSegments, + AllTypesMatch<["lhs", "rhs"]>, + HasMatchingMaskTypeConstraint<"lhs">, + HasMatchingMaskTypeConstraint<"rhs">, + PredOpTrait< + "both `lhsMask` and `rhsMask` should be provided or neither", + CPred<"bool(getLhsMask()) == bool(getRhsMask())">>, + OuterProductResultTileTypeConstraint<"result">, + OuterProductResultTileTypeConstraint<"acc"> + ]> +{ + let summary = "Outer product with optional fused add/sub"; + + let description = [{ + This operation represents an outer product that fits within an SME tile. + All operands must be SVE vectors and the result a SME tile. Unlike + `vector.outerproduct` masking is on the operands (rather than the result), + which mirrors the SME instructions. + + Example 1: Unmasked outerproduct (without accumulator) + ```mlir + // Not specifying an accumulator implicitly zeros the destination tile. + %result = arm_sme.outerproduct $lhs, $rhs : vector<[4]xf32>, vector<[4]xf32> + ``` + + Example 2: Unmasked outerproduct (with accumulator) + ```mlir + %result = arm_sme.outerproduct $lhs, $rhs acc($accumulator) + : vector<[4]xf32>, vector<[4]xf32> + ``` + + Example 3: Masked outerproduct + ```mlir + %result = arm_sme.outerproduct $lhs, $rhs masks($lhsMask, $rhsMask) + : vector<[4]xf32>, vector<[4]xf32> + ``` + + Example 4: Masked outerproduct (with accumulator) + ```mlir + %result = arm_sme.outerproduct $lhs, $rhs acc($accumulator) masks($lhsMask, $rhsMask) + : vector<[4]xf32>, vector<[4]xf32> + ``` + }]; + + let arguments = (ins + SVEVector:$lhs, SVEVector:$rhs, + Optional:$lhsMask, + Optional:$rhsMask, + Optional: $acc, + ArmSME_CombiningKindAttr:$kind); + let results = (outs SMETile:$result); + + let assemblyFormat = [{ + $lhs `,` $rhs + oilist( + `kind` `` $kind + | `acc` `` `(` $acc `)` + | `masks` `` `(` $lhsMask `,` $rhsMask `)` + ) attr-dict `:` type($lhs) `,` type($rhs) + }]; + + let extraClassDeclaration = [{ + VectorType getLhsType() { return llvm::cast(getLhs().getType()); } + VectorType getRhsType() { return llvm::cast(getRhs().getType()); } + VectorType getResultType() { return llvm::cast(getResult().getType()); } + }]; +} + #endif // ARMSME_OPS diff --git a/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp b/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp index d06eb4f5b01c95..b60c21e2ced7a8 100644 --- a/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp +++ b/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp @@ -427,6 +427,105 @@ struct TransposeOpToArmSMELowering } }; +/// Conversion pattern for vector.outerproduct. +/// +/// If the vector.outerproduct is masked (and the mask is from a +/// vector.create_mask), then the mask is decomposed into two 1-D masks for the +/// operands. +/// +/// Example: +/// +/// %mask = vector.create_mask %dimA, %dimB : vector<[4]x[4]xi1> +/// %result = vector.mask %mask { +/// vector.outerproduct %vecA, %vecB +/// : vector<[4]xf32>, vector<[4]xf32> +/// } : vector<[4]x[4]xi1> -> vector<[4]x[4]xf32> +/// +/// is converted to: +/// +/// %maskA = vector.create_mask %dimA : vector<[4]xi1> +/// %maskB = vector.create_mask %dimB : vector<[4]xi1> +/// %result = arm_sme.outerproduct %vecA, %vecB masks(%maskA, %maskB) +/// : vector<[4]xf32>, vector<[4]xf32> +/// +/// Unmasked outerproducts can be directly replaced with the arm_sme op. +/// +/// Example: +/// +/// %result = vector.outerproduct %vecA, %vecB +/// : vector<[4]xf32>, vector<[4]xf32> +/// +/// is converted to: +/// +/// %result = arm_sme.outerproduct %vecA, %vecB +/// : vector<[4]xf32>, vector<[4]xf32> +/// +struct VectorOuterProductToArmSMELowering + : public OpRewritePattern { + + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::OuterProductOp outerProductOp, + PatternRewriter &rewriter) const override { + + // We don't yet support lowering AXPY operations to SME. These could be + // lowered by masking out all but the first element of the LHS. + if (!isa(outerProductOp.getOperandTypeRHS())) + return outerProductOp.emitError("AXPY operations not supported"); + + if (!arm_sme::isValidSMETileVectorType( + outerProductOp.getResultVectorType())) + return outerProductOp.emitError( + "outer product does not fit into SME tile"); + + auto kind = outerProductOp.getKind(); + if (kind != vector::CombiningKind::ADD) + return outerProductOp.emitError( + "unsupported kind (lowering to SME only supports ADD at the moment)"); + + Value lhsMask = {}; + Value rhsMask = {}; + Operation *rootOp = outerProductOp; + auto loc = outerProductOp.getLoc(); + if (outerProductOp.isMasked()) { + auto maskOp = outerProductOp.getMaskingOp(); + rewriter.setInsertionPoint(maskOp); + rootOp = maskOp; + auto operandMasks = decomposeResultMask(loc, maskOp.getMask(), rewriter); + if (failed(operandMasks)) + return failure(); + std::tie(lhsMask, rhsMask) = *operandMasks; + } + + rewriter.replaceOpWithNewOp( + rootOp, outerProductOp.getResultVectorType(), outerProductOp.getLhs(), + outerProductOp.getRhs(), lhsMask, rhsMask, outerProductOp.getAcc()); + + return success(); + } + + static FailureOr> + decomposeResultMask(Location loc, Value mask, PatternRewriter &rewriter) { + // Attempt to extract masks from vector.create_mask. + // TODO: Add support for other mask sources. + auto createMaskOp = mask.getDefiningOp(); + if (!createMaskOp) + return failure(); + + auto maskType = createMaskOp.getVectorType(); + Value lhsMaskDim = createMaskOp.getOperand(0); + Value rhsMaskDim = createMaskOp.getOperand(1); + + VectorType operandMaskType = VectorType::Builder(maskType).dropDim(0); + Value lhsMask = + rewriter.create(loc, operandMaskType, lhsMaskDim); + Value rhsMask = + rewriter.create(loc, operandMaskType, rhsMaskDim); + + return std::make_pair(lhsMask, rhsMask); + } +}; + } // namespace void mlir::populateVectorToArmSMEPatterns(RewritePatternSet &patterns, @@ -434,5 +533,6 @@ void mlir::populateVectorToArmSMEPatterns(RewritePatternSet &patterns, patterns.add(&ctx); + VectorLoadToArmSMELowering, VectorStoreToArmSMELowering, + VectorOuterProductToArmSMELowering>(&ctx); } diff --git a/mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp index 1231da356f8ed9..105f2de207a084 100644 --- a/mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp +++ b/mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp @@ -460,11 +460,11 @@ struct MoveTileSliceToVectorArmSMELowering } }; -/// Lower `vector.outerproduct` to SME MOPA intrinsics. +/// Lower `arm_sme.outerproduct` to SME MOPA intrinsics. /// /// Example: /// -/// %0 = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} +/// %0 = arm_sme.outerproduct %lhs, %rhs acc(%acc) /// : vector<[4]xf32>, vector<[4]xf32> /// /// is converted to: @@ -474,13 +474,13 @@ struct MoveTileSliceToVectorArmSMELowering /// vector<[4]xf32>) -> () /// /// Currently only supports FMOPA and BFMOPA (non-widening). -struct VectorOuterProductToArmSMELowering - : public ConvertOpToLLVMPattern { - using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; +struct OuterProductOpConversion + : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; LogicalResult - matchAndRewrite(vector::OuterProductOp outerProductOp, - vector::OuterProductOp::Adaptor adaptor, + matchAndRewrite(arm_sme::OuterProductOp outerProductOp, + arm_sme::OuterProductOp::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { auto isSupportedType = [](VectorType vectorType) { // TODO: the FP outer product instruction variants are predicated on @@ -512,24 +512,13 @@ struct VectorOuterProductToArmSMELowering return true; }; - auto resultVectorType = outerProductOp.getResultVectorType(); - if (!isSupportedType(resultVectorType)) - return outerProductOp.emitError("unsupported type"); - - vector::CombiningKind kind = outerProductOp.getKind(); - if (kind != vector::CombiningKind::ADD) - // TODO: support subtract. + // TODO: Support CombiningKind::Sub for outer products. + if (outerProductOp.getKind() != CombiningKind::Add) return outerProductOp.emitError("unsupported kind"); - auto maskableOp = - cast(outerProductOp.getOperation()); - if (maskableOp.isMasked()) - // TODO: support masking. - return outerProductOp.emitError("masking is currently unsupported"); - - if (!isa(outerProductOp.getOperandTypeRHS())) - // AXPY operation not suited for SME. - return failure(); + auto resultVectorType = outerProductOp.getResultType(); + if (!isSupportedType(resultVectorType)) + return outerProductOp.emitError("unsupported type"); auto loc = outerProductOp.getLoc(); @@ -542,21 +531,24 @@ struct VectorOuterProductToArmSMELowering auto tileId = rewriter.create( loc, rewriter.getIntegerType(elementWidth), acc); - // Create all active predicate mask. - auto one = rewriter.create( - loc, rewriter.getI1Type(), - rewriter.getIntegerAttr(rewriter.getI1Type(), 1)); - auto predTy = - VectorType::get(resultVectorType.getShape()[0], rewriter.getI1Type(), - /*scalableDims=*/{true}); - auto allActiveMask = rewriter.create(loc, predTy, one); - auto tileI32 = castTileIDToI32(tileId, loc, rewriter); + Value lhsMask = outerProductOp.getLhsMask(); + Value rhsMask = outerProductOp.getRhsMask(); + + if (!lhsMask || !rhsMask) { + auto predTy = + outerProductOp.getLhsType().cloneWith({}, rewriter.getI1Type()); + Value allActiveMask = rewriter.create( + loc, DenseElementsAttr::get(predTy, true)); + lhsMask = allActiveMask; + rhsMask = allActiveMask; + } + // Create 'arm_sme.intr.mopa' outer product intrinsic. - rewriter.create( - loc, tileI32, allActiveMask, allActiveMask, outerProductOp.getLhs(), - outerProductOp.getRhs()); + rewriter.create(loc, tileI32, lhsMask, rhsMask, + outerProductOp.getLhs(), + outerProductOp.getRhs()); // Create `CastTileToVectorOp` to use as the output. rewriter.replaceOpWithNewOp( @@ -733,6 +725,6 @@ void mlir::populateArmSMELegalizeForLLVMExportPatterns( patterns.add< LoadTileSliceToArmSMELowering, MoveTileSliceToVectorArmSMELowering, MoveVectorToTileSliceToArmSMELowering, StoreTileSliceToArmSMELowering, - VectorOuterProductToArmSMELowering, ZeroOpConversion, - VectorExtractToArmSMELowering, VectorInsertToArmSMELowering>(converter); + OuterProductOpConversion, ZeroOpConversion, VectorExtractToArmSMELowering, + VectorInsertToArmSMELowering>(converter); } diff --git a/mlir/test/Dialect/ArmSME/invalid.mlir b/mlir/test/Dialect/ArmSME/invalid.mlir index 25c62f78d84354..dba8b1937936e2 100644 --- a/mlir/test/Dialect/ArmSME/invalid.mlir +++ b/mlir/test/Dialect/ArmSME/invalid.mlir @@ -150,3 +150,25 @@ func.func @arm_sme_tile_load__pad_but_no_mask(%src : memref, %pad : f64 %tile = arm_sme.tile_load %src[%c0, %c0], %pad, : memref, vector<[2]x[2]xf64> return } + +//===----------------------------------------------------------------------===// +// arm_sme.outerproduct +//===----------------------------------------------------------------------===// + +// ----- + +func.func @arm_sme_outerproduct__bad_result_type(%vecA: vector<[2]xi16>, %vecB: vector<[2]xi16>) -> vector<[2]x[2]xi16> +{ + // expected-error@+1 {{op result #0 must be vector<[16]x[16]xi8> of 8-bit signless integer values or vector<[8]x[8]xi16> of 16-bit signless integer values or vector<[4]x[4]xi32> of 32-bit signless integer values or vector<[2]x[2]xi64> of 64-bit signless integer values or vector<[1]x[1]xi128> of 128-bit signless integer values or vector<[8]x[8]xf16> of 16-bit float values or vector<[8]x[8]xbf16> of bfloat16 type values or vector<[4]x[4]xf32> of 32-bit float values or vector<[2]x[2]xf64> of 64-bit float values, but got 'vector<[2]x[2]xi16>'}} + %0 = arm_sme.outerproduct %vecA, %vecB : vector<[2]xi16>, vector<[2]xi16> + return %0 : vector<[2]x[2]xi16> +} + +// ----- + +func.func @arm_sme_outerproduct__bad_vector_type(%vecA: vector<[4]xf32>, %vecB: vector<[8]xf32>) -> vector<[4]x[4]xf32> +{ + // expected-error@+1 {{op failed to verify that all of {lhs, rhs} have same type}} + %0 = arm_sme.outerproduct %vecA, %vecB : vector<[4]xf32>, vector<[8]xf32> + return %0 : vector<[4]x[4]xf32> +} diff --git a/mlir/test/Dialect/ArmSME/roundtrip.mlir b/mlir/test/Dialect/ArmSME/roundtrip.mlir index 6866137267dc66..90b05c54c58d93 100644 --- a/mlir/test/Dialect/ArmSME/roundtrip.mlir +++ b/mlir/test/Dialect/ArmSME/roundtrip.mlir @@ -1161,3 +1161,47 @@ func.func @arm_sme_move_tile_slice_to_vector_ver_f64(%tile : vector<[2]x[2]xf64> %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] layout : vector<[2]xf64> from vector<[2]x[2]xf64> return %slice : vector<[2]xf64> } + +//===----------------------------------------------------------------------===// +// arm_sme.outerproduct +//===----------------------------------------------------------------------===// + +// ----- + +func.func @arm_sme_outerproduct(%vecA: vector<[8]xi16>, %vecB: vector<[8]xi16>) -> vector<[8]x[8]xi16> { + // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} : vector<[8]xi16>, vector<[8]xi16> + %result = arm_sme.outerproduct %vecA, %vecB : vector<[8]xi16>, vector<[8]xi16> + return %result : vector<[8]x[8]xi16> +} + +// ----- + +func.func @arm_sme_outerproduct_with_masking(%vecA: vector<[4]xf32>, %vecB: vector<[4]xf32>, %maskA: vector<[4]xi1>, %maskB: vector<[4]xi1>) -> vector<[4]x[4]xf32> { + // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} masks({{.*}}, {{.*}}) : vector<[4]xf32>, vector<[4]xf32> + %result = arm_sme.outerproduct %vecA, %vecB masks(%maskA, %maskB) : vector<[4]xf32>, vector<[4]xf32> + return %result : vector<[4]x[4]xf32> +} + +// ----- + +func.func @arm_sme_outerproduct_with_acc(%vecA: vector<[2]xi64>, %vecB: vector<[2]xi64>, %acc: vector<[2]x[2]xi64>) -> vector<[2]x[2]xi64> { + // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} acc({{.*}}) : vector<[2]xi64>, vector<[2]xi64> + %result = arm_sme.outerproduct %vecA, %vecB acc(%acc) : vector<[2]xi64>, vector<[2]xi64> + return %result : vector<[2]x[2]xi64> +} + +// ----- + +func.func @arm_sme_outerproduct_with_kind(%vecA: vector<[2]xf64>, %vecB: vector<[2]xf64>) -> vector<[2]x[2]xf64> { + // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} kind : vector<[2]xf64>, vector<[2]xf64> + %result = arm_sme.outerproduct %vecA, %vecB kind : vector<[2]xf64>, vector<[2]xf64> + return %result : vector<[2]x[2]xf64> +} + +// ----- + +func.func @arm_sme_outerproduct_with_everything(%vecA: vector<[16]xi8>, %vecB: vector<[16]xi8>, %acc: vector<[16]x[16]xi8>, %maskA: vector<[16]xi1>, %maskB: vector<[16]xi1>) -> vector<[16]x[16]xi8> { + // CHECK: arm_sme.outerproduct {{.*}}, {{.*}} kind acc({{.*}}) masks({{.*}}, {{.*}}) : vector<[16]xi8>, vector<[16]xi8> + %result = arm_sme.outerproduct %vecA, %vecB kind acc(%acc) masks(%maskA, %maskB) : vector<[16]xi8>, vector<[16]xi8> + return %result : vector<[16]x[16]xi8> +} diff --git a/mlir/test/Dialect/ArmSME/vector-ops-to-llvm.mlir b/mlir/test/Dialect/ArmSME/vector-ops-to-llvm.mlir index 32f46d9fd817c9..721ff8f2c3589d 100644 --- a/mlir/test/Dialect/ArmSME/vector-ops-to-llvm.mlir +++ b/mlir/test/Dialect/ArmSME/vector-ops-to-llvm.mlir @@ -463,9 +463,68 @@ func.func @vector_outerproduct_no_accumulator(%lhs : vector<[2]xf64>, %rhs : vec // ----- +// CHECK-LABEL: @vector_outerproduct_masked_f32 +// CHECK-SAME: (%[[LHS:.*]]: vector<[4]xf32>, %[[RHS:.*]]: vector<[4]xf32>, %[[ACC:.*]]: vector<[4]x[4]xf32>, %[[DIM0:.*]]: index, %[[DIM1:.*]]: index +func.func @vector_outerproduct_masked_f32(%lhs : vector<[4]xf32>, %rhs : vector<[4]xf32>, %acc : vector<[4]x[4]xf32>, %dim0 : index, %dim1 : index) { + // CHECK: %[[DIM0_I32:.*]] = arith.index_cast %[[DIM0]] : index to i32 + // CHECK: %[[INSERT_DIM0:.*]] = llvm.insertelement %[[DIM0_I32]], {{.*}} : vector<[4]xi32> + // CHECK: %[[SPLAT_DIM0:.*]] = llvm.shufflevector %[[INSERT_DIM0]], {{.*}} : vector<[4]xi32> + // CHECK: %[[LHS_MASK:.*]] = arith.cmpi slt, %{{.*}}, %[[SPLAT_DIM0]] : vector<[4]xi32> + // CHECK: %[[DIM1_I32:.*]] = arith.index_cast %[[DIM1]] : index to i32 + // CHECK: %[[INSERT_DIM1:.*]] = llvm.insertelement %[[DIM1_I32]], {{.*}} : vector<[4]xi32> + // CHECK: %[[SPLAT_DIM1:.*]] = llvm.shufflevector %[[INSERT_DIM1]], {{.*}} : vector<[4]xi32> + // CHECK: %[[RHS_MASK:.*]] = arith.cmpi slt, %{{.*}}, %[[SPLAT_DIM1]] : vector<[4]xi32> + // CHECK: %[[CAST_VECTOR_TO_TILE:.*]] = arm_sme.cast_vector_to_tile %[[ACC]] : vector<[4]x[4]xf32> to i32 + // CHECK: "arm_sme.intr.mopa"(%[[CAST_VECTOR_TO_TILE]], %[[LHS_MASK]], %[[RHS_MASK]], %[[LHS]], %[[RHS]]) : (i32, vector<[4]xi1>, vector<[4]xi1>, vector<[4]xf32>, vector<[4]xf32>) + %mask = vector.create_mask %dim0, %dim1 : vector<[4]x[4]xi1> + %result = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[4]xf32>, vector<[4]xf32> } : vector<[4]x[4]xi1> -> vector<[4]x[4]xf32> + "prevent.dce"(%result) : (vector<[4]x[4]xf32>) -> () +} + +// ----- + +// CHECK-LABEL: @vector_outerproduct_masked_f16 +// CHECK-SAME: (%[[LHS:.*]]: vector<[8]xf16>, %[[RHS:.*]]: vector<[8]xf16>, %[[ACC:.*]]: vector<[8]x[8]xf16>, +func.func @vector_outerproduct_masked_f16(%lhs : vector<[8]xf16>, %rhs : vector<[8]xf16>, %acc : vector<[8]x[8]xf16>, %dim0 : index, %dim1 : index) { + // CHECK: arith.cmpi slt, {{.*}} : vector<[8]xi32> + // CHECK: arith.cmpi slt, {{.*}} : vector<[8]xi32> + // CHECK: "arm_sme.intr.mopa"({{.*}}, {{.*}}, {{.*}}) : (i32, vector<[8]xi1>, vector<[8]xi1>, vector<[8]xf16>, vector<[8]xf16>) + %mask = vector.create_mask %dim0, %dim1 : vector<[8]x[8]xi1> + %result = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[8]xf16>, vector<[8]xf16> } : vector<[8]x[8]xi1> -> vector<[8]x[8]xf16> + "prevent.dce"(%result) : (vector<[8]x[8]xf16>) -> () +} + +// ----- + +// CHECK-LABEL: @vector_outerproduct_masked_bf16 +// CHECK-SAME: (%[[LHS:.*]]: vector<[8]xbf16>, %[[RHS:.*]]: vector<[8]xbf16>, %[[ACC:.*]]: vector<[8]x[8]xbf16>, +func.func @vector_outerproduct_masked_bf16(%lhs : vector<[8]xbf16>, %rhs : vector<[8]xbf16>, %acc : vector<[8]x[8]xbf16>, %dim0 : index, %dim1 : index) { + // CHECK: arith.cmpi slt, {{.*}} : vector<[8]xi32> + // CHECK: arith.cmpi slt, {{.*}} : vector<[8]xi32> + // CHECK: "arm_sme.intr.mopa"({{.*}}, {{.*}}, {{.*}}) : (i32, vector<[8]xi1>, vector<[8]xi1>, vector<[8]xbf16>, vector<[8]xbf16>) + %mask = vector.create_mask %dim0, %dim1 : vector<[8]x[8]xi1> + %result = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[8]xbf16>, vector<[8]xbf16> } : vector<[8]x[8]xi1> -> vector<[8]x[8]xbf16> + "prevent.dce"(%result) : (vector<[8]x[8]xbf16>) -> () +} + +// ----- + +// CHECK-LABEL: @vector_outerproduct_masked_f64 +// CHECK-SAME: (%[[LHS:.*]]: vector<[2]xf64>, %[[RHS:.*]]: vector<[2]xf64>, %[[ACC:.*]]: vector<[2]x[2]xf64>, +func.func @vector_outerproduct_masked_f64(%lhs : vector<[2]xf64>, %rhs : vector<[2]xf64>, %acc : vector<[2]x[2]xf64>, %dim0 : index, %dim1 : index) { + // CHECK: arith.cmpi slt, {{.*}} : vector<[2]xi32> + // CHECK: arith.cmpi slt, {{.*}} : vector<[2]xi32> + // CHECK: "arm_sme.intr.mopa"({{.*}}, {{.*}}, {{.*}}) : (i32, vector<[2]xi1>, vector<[2]xi1>, vector<[2]xf64>, vector<[2]xf64>) + %mask = vector.create_mask %dim0, %dim1 : vector<[2]x[2]xi1> + %result = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[2]xf64>, vector<[2]xf64> } : vector<[2]x[2]xi1> -> vector<[2]x[2]xf64> + "prevent.dce"(%result) : (vector<[2]x[2]xf64>) -> () +} + +// ----- + // CHECK-LABEL: @vector_outerproduct_unsupported_axpy func.func @vector_outerproduct_unsupported_axpy(%lhs : vector<[2]xf64>, %rhs : f64, %acc : vector<[2]xf64>) -> vector<[2]xf64> { - // CHECK-NOT: arm_sme + // expected-error@+1 {{AXPY operations not supported}} %0 = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[2]xf64>, f64 return %0 : vector<[2]xf64> } @@ -473,7 +532,6 @@ func.func @vector_outerproduct_unsupported_axpy(%lhs : vector<[2]xf64>, %rhs : f // ----- func.func @vector_outerproduct_unsupported_type(%lhs : vector<[16]xi8>, %rhs : vector<[16]xi8>, %acc : vector<[16]x[16]xi8>) { - // expected-error@+2 {{failed to legalize operation 'vector.outerproduct'}} // expected-error@+1 {{unsupported type}} %0 = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[16]xi8>, vector<[16]xi8> "prevent.dce"(%0) : (vector<[16]x[16]xi8>) -> () @@ -490,9 +548,8 @@ func.func @vector_outerproduct_unsupported_kind(%lhs : vector<[2]xf64>, %rhs : v // ----- -func.func @vector_outerproduct_add_masked_f32(%lhs : vector<[4]xf32>, %rhs : vector<[4]xf32>, %acc : vector<[4]x[4]xf32>, %mask : vector<[4]x[4]xi1>) { - // expected-error@+2 {{failed to legalize operation 'vector.outerproduct'}} - // expected-error@+1 {{masking is currently unsupported}} +func.func @vector_outerproduct_unknown_mask(%lhs : vector<[4]xf32>, %rhs : vector<[4]xf32>, %acc : vector<[4]x[4]xf32>, %mask : vector<[4]x[4]xi1>) { + // expected-error@+1 {{failed to legalize operation 'vector.outerproduct'}} %0 = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[4]xf32>, vector<[4]xf32> } : vector<[4]x[4]xi1> -> vector<[4]x[4]xf32> "prevent.dce"(%0) : (vector<[4]x[4]xf32>) -> () } diff --git a/mlir/test/Dialect/ArmSME/vector-ops-to-sme.mlir b/mlir/test/Dialect/ArmSME/vector-ops-to-sme.mlir index 455b47a83e28f4..9eb7cd143e5b5e 100644 --- a/mlir/test/Dialect/ArmSME/vector-ops-to-sme.mlir +++ b/mlir/test/Dialect/ArmSME/vector-ops-to-sme.mlir @@ -578,3 +578,99 @@ func.func @transpose_f64(%arg0: vector<[2]x[2]xf64>) { "prevent.dce"(%0) : (vector<[2]x[2]xf64>) -> () return } + +//===----------------------------------------------------------------------===// +// vector.outerproduct +//===----------------------------------------------------------------------===// + +// ----- + +// CHECK-LABEL: @vector_outerproduct_masked_f16 +// CHECK-SAME: (%[[LHS:.*]]: vector<[8]xf16>, %[[RHS:.*]]: vector<[8]xf16>, %[[ACC:.*]]: vector<[8]x[8]xf16>, %[[DIM0:.*]]: index, %[[DIM1:.*]]: index +func.func @vector_outerproduct_masked_f16(%lhs : vector<[8]xf16>, %rhs : vector<[8]xf16>, %acc : vector<[8]x[8]xf16>, %dim0 : index, %dim1 : index) { + %mask = vector.create_mask %dim0, %dim1 : vector<[8]x[8]xi1> + // CHECK: %[[LHS_MASK:.*]] = vector.create_mask %[[DIM0]] : vector<[8]xi1> + // CHECK: %[[RHS_MASK:.*]] = vector.create_mask %[[DIM1]] : vector<[8]xi1> + // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) masks(%[[LHS_MASK]], %[[RHS_MASK]]) : vector<[8]xf16>, vector<[8]xf16> + %result = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[8]xf16>, vector<[8]xf16> } : vector<[8]x[8]xi1> -> vector<[8]x[8]xf16> + "prevent.dce"(%result) : (vector<[8]x[8]xf16>) -> () +} + +// ----- + +// CHECK-LABEL: @vector_outerproduct_masked_bf16 +// CHECK-SAME: (%[[LHS:.*]]: vector<[8]xbf16>, %[[RHS:.*]]: vector<[8]xbf16>, %[[ACC:.*]]: vector<[8]x[8]xbf16>, %[[DIM0:.*]]: index, %[[DIM1:.*]]: index +func.func @vector_outerproduct_masked_bf16(%lhs : vector<[8]xbf16>, %rhs : vector<[8]xbf16>, %acc : vector<[8]x[8]xbf16>, %dim0 : index, %dim1 : index) { + %mask = vector.create_mask %dim0, %dim1 : vector<[8]x[8]xi1> + // CHECK: %[[LHS_MASK:.*]] = vector.create_mask %[[DIM0]] : vector<[8]xi1> + // CHECK: %[[RHS_MASK:.*]] = vector.create_mask %[[DIM1]] : vector<[8]xi1> + // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) masks(%[[LHS_MASK]], %[[RHS_MASK]]) : vector<[8]xbf16>, vector<[8]xbf16> + %result = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[8]xbf16>, vector<[8]xbf16> } : vector<[8]x[8]xi1> -> vector<[8]x[8]xbf16> + "prevent.dce"(%result) : (vector<[8]x[8]xbf16>) -> () +} + +// ----- + +// CHECK-LABEL: @vector_outerproduct_masked_f32 +// CHECK-SAME: (%[[LHS:.*]]: vector<[4]xf32>, %[[RHS:.*]]: vector<[4]xf32>, %[[ACC:.*]]: vector<[4]x[4]xf32>, %[[DIM0:.*]]: index, %[[DIM1:.*]]: index +func.func @vector_outerproduct_masked_f32(%lhs : vector<[4]xf32>, %rhs : vector<[4]xf32>, %acc : vector<[4]x[4]xf32>, %dim0 : index, %dim1 : index) { + %mask = vector.create_mask %dim0, %dim1 : vector<[4]x[4]xi1> + // CHECK: %[[LHS_MASK:.*]] = vector.create_mask %[[DIM0]] : vector<[4]xi1> + // CHECK: %[[RHS_MASK:.*]] = vector.create_mask %[[DIM1]] : vector<[4]xi1> + // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) masks(%[[LHS_MASK]], %[[RHS_MASK]]) : vector<[4]xf32>, vector<[4]xf32> + %result = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[4]xf32>, vector<[4]xf32> } : vector<[4]x[4]xi1> -> vector<[4]x[4]xf32> + "prevent.dce"(%result) : (vector<[4]x[4]xf32>) -> () +} + +// ----- + +// CHECK-LABEL: @vector_outerproduct_masked_f64 +// CHECK-SAME: (%[[LHS:.*]]: vector<[2]xf64>, %[[RHS:.*]]: vector<[2]xf64>, %[[ACC:.*]]: vector<[2]x[2]xf64>, %[[DIM0:.*]]: index, %[[DIM1:.*]]: index +func.func @vector_outerproduct_masked_f64(%lhs : vector<[2]xf64>, %rhs : vector<[2]xf64>, %acc : vector<[2]x[2]xf64>, %dim0 : index, %dim1 : index) { + %mask = vector.create_mask %dim0, %dim1 : vector<[2]x[2]xi1> + // CHECK: %[[LHS_MASK:.*]] = vector.create_mask %[[DIM0]] : vector<[2]xi1> + // CHECK: %[[RHS_MASK:.*]] = vector.create_mask %[[DIM1]] : vector<[2]xi1> + // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) masks(%[[LHS_MASK]], %[[RHS_MASK]]) : vector<[2]xf64>, vector<[2]xf64> + %result = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[2]xf64>, vector<[2]xf64> } : vector<[2]x[2]xi1> -> vector<[2]x[2]xf64> + "prevent.dce"(%result) : (vector<[2]x[2]xf64>) -> () +} + +// ----- + +// CHECK-LABEL: @vector_outerproduct_f16 +// CHECK-SAME: (%[[LHS:.*]]: vector<[8]xf16>, %[[RHS:.*]]: vector<[8]xf16>, %[[ACC:.*]]: vector<[8]x[8]xf16> +func.func @vector_outerproduct_f16(%lhs : vector<[8]xf16>, %rhs : vector<[8]xf16>, %acc : vector<[8]x[8]xf16>) { + // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) : vector<[8]xf16>, vector<[8]xf16> + %result = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[8]xf16>, vector<[8]xf16> + "prevent.dce"(%result) : (vector<[8]x[8]xf16>) -> () +} + +// ----- + +// CHECK-LABEL: @vector_outerproduct_bf16 +// CHECK-SAME: (%[[LHS:.*]]: vector<[8]xbf16>, %[[RHS:.*]]: vector<[8]xbf16>, %[[ACC:.*]]: vector<[8]x[8]xbf16> +func.func @vector_outerproduct_bf16(%lhs : vector<[8]xbf16>, %rhs : vector<[8]xbf16>, %acc : vector<[8]x[8]xbf16>) { + // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) : vector<[8]xbf16>, vector<[8]xbf16> + %result = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[8]xbf16>, vector<[8]xbf16> + "prevent.dce"(%result) : (vector<[8]x[8]xbf16>) -> () +} + +// ----- + +// CHECK-LABEL: @vector_outerproduct_f32 +// CHECK-SAME: (%[[LHS:.*]]: vector<[4]xf32>, %[[RHS:.*]]: vector<[4]xf32>, %[[ACC:.*]]: vector<[4]x[4]xf32> +func.func @vector_outerproduct_f32(%lhs : vector<[4]xf32>, %rhs : vector<[4]xf32>, %acc : vector<[4]x[4]xf32>) { + // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) : vector<[4]xf32>, vector<[4]xf32> + %result = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[4]xf32>, vector<[4]xf32> + "prevent.dce"(%result) : (vector<[4]x[4]xf32>) -> () +} + +// ----- + +// CHECK-LABEL: @vector_outerproduct_f64 +// CHECK-SAME: (%[[LHS:.*]]: vector<[2]xf64>, %[[RHS:.*]]: vector<[2]xf64>, %[[ACC:.*]]: vector<[2]x[2]xf64> +func.func @vector_outerproduct_f64(%lhs : vector<[2]xf64>, %rhs : vector<[2]xf64>, %acc : vector<[2]x[2]xf64>) { + // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) : vector<[2]xf64>, vector<[2]xf64> + %result = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<[2]xf64>, vector<[2]xf64> + "prevent.dce"(%result) : (vector<[2]x[2]xf64>) -> () +} diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir index 38ba489e2fafb2..ae5ad9cc2a5e90 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir @@ -3,16 +3,24 @@ // DEFINE: -enable-arm-streaming="mode=locally enable-za" \ // DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \ // DEFINE: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \ -// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm -// DEFINE: %{run} = %mcr_aarch64_cmd \ +// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm -o %t +// DEFINE: %{run} = %mcr_aarch64_cmd %t \ // DEFINE: -march=aarch64 -mattr=+sve,+sme \ // DEFINE: -e %{entry_point} -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils -// RUN: %{compile} | %{run} | FileCheck %s --check-prefix=WITHOUT-ACC +// RUN: %{compile} + +// RUN: %{run} | FileCheck %s --check-prefix=WITHOUT-ACC // REDEFINE: %{entry_point} = test_outerproduct_with_accumulator_4x4xf32 -// RUN: %{compile} | %{run} | FileCheck %s --check-prefix=WITH-ACC +// RUN: %{run} | FileCheck %s --check-prefix=WITH-ACC + +// REDEFINE: %{entry_point} = test_masked_outerproduct_no_accumulator_4x4xf32 +// RUN: %{run} | FileCheck %s --check-prefix=WITH-MASK + +// REDEFINE: %{entry_point} = test_masked_outerproduct_with_accumulator_4x4xf32 +// RUN: %{run} | FileCheck %s --check-prefix=WITH-MASK-AND-ACC func.func @test_outerproduct_no_accumulator_4x4xf32() { %c0 = arith.constant 0 : index @@ -41,7 +49,7 @@ func.func @test_outerproduct_with_accumulator_4x4xf32() { %c0 = arith.constant 0 : index %f10 = arith.constant 10.0 : f32 - %acc = vector.broadcast %f10 : f32 to vector<[4]x[4]xf32> + %acc = vector.splat %f10 : vector<[4]x[4]xf32> %vector_i32 = llvm.intr.experimental.stepvector : vector<[4]xi32> %vector = arith.sitofp %vector_i32 : vector<[4]xi32> to vector<[4]xf32> %tile = vector.outerproduct %vector, %vector, %acc : vector<[4]xf32>, vector<[4]xf32> @@ -61,3 +69,67 @@ func.func @test_outerproduct_with_accumulator_4x4xf32() { return } + +func.func @test_masked_outerproduct_no_accumulator_4x4xf32() { + %c0 = arith.constant 0 : index + %ones = arith.constant dense<1> : vector<[4]xi32> + + %step_vector = llvm.intr.experimental.stepvector : vector<[4]xi32> + %vector_i32 = arith.addi %step_vector, %ones : vector<[4]xi32> + %vector = arith.sitofp %vector_i32 : vector<[4]xi32> to vector<[4]xf32> + + %lhsDim = arith.constant 3 : index + %rhsDim = arith.constant 2 : index + %mask = vector.create_mask %lhsDim, %rhsDim : vector<[4]x[4]xi1> + + %tile = vector.mask %mask { + vector.outerproduct %vector, %vector : vector<[4]xf32>, vector<[4]xf32> + } : vector<[4]x[4]xi1> -> vector<[4]x[4]xf32> + + // Print the tile. Due to masking the result will be the top 3x2xf32 section. + // + // WITH-MASK: TILE BEGIN + // WITH-MASK-NEXT: ( 1, 2, 0, 0 + // WITH-MASK-NEXT: ( 2, 4, 0, 0 + // WITH-MASK-NEXT: ( 3, 6, 0, 0 + // WITH-MASK-NEXT: ( 0, 0, 0, 0 + // WITH-MASK: TILE END + vector.print str "TILE BEGIN" + vector.print %tile : vector<[4]x[4]xf32> + vector.print str "TILE END" + + return +} + +func.func @test_masked_outerproduct_with_accumulator_4x4xf32() { + %c0 = arith.constant 0 : index + %ones = arith.constant dense<1> : vector<[4]xi32> + %f10 = arith.constant 10.0 : f32 + + %acc = vector.splat %f10 : vector<[4]x[4]xf32> + %step_vector = llvm.intr.experimental.stepvector : vector<[4]xi32> + %vector_i32 = arith.addi %step_vector, %ones : vector<[4]xi32> + %vector = arith.sitofp %vector_i32 : vector<[4]xi32> to vector<[4]xf32> + + %lhsDim = arith.constant 2 : index + %rhsDim = arith.constant 3 : index + %mask = vector.create_mask %lhsDim, %rhsDim : vector<[4]x[4]xi1> + + %tile = vector.mask %mask { + vector.outerproduct %vector, %vector, %acc : vector<[4]xf32>, vector<[4]xf32> + } : vector<[4]x[4]xi1> -> vector<[4]x[4]xf32> + + // Print the tile. Due to masking the result will be the top 2x3xf32 section. + // + // WITH-MASK-AND-ACC: TILE BEGIN + // WITH-MASK-AND-ACC-NEXT: ( 11, 12, 13, 10 + // WITH-MASK-AND-ACC-NEXT: ( 12, 14, 16, 10 + // WITH-MASK-AND-ACC-NEXT: ( 10, 10, 10, 10 + // WITH-MASK-AND-ACC-NEXT: ( 10, 10, 10, 10 + // WITH-MASK-AND-ACC: TILE END + vector.print str "TILE BEGIN" + vector.print %tile : vector<[4]x[4]xf32> + vector.print str "TILE END" + + return +} diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir index 82f14595a24da2..36ce896a4c1bd9 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir @@ -1,34 +1,47 @@ -// DEFINE: %{entry_point} = test_outerproduct_with_accumulator_2x2xf64 +// DEFINE: %{entry_point} = test_outerproduct_no_accumulator_2x2xf64 // DEFINE: %{compile} = mlir-opt %s \ // DEFINE: -enable-arm-streaming="mode=locally enable-za" \ // DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \ // DEFINE: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \ -// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm -// DEFINE: %{run} = %mcr_aarch64_cmd \ +// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm -o %t +// DEFINE: %{run} = %mcr_aarch64_cmd %t \ // DEFINE: -march=aarch64 -mattr=+sve,+sme-f64f64 \ // DEFINE: -e %{entry_point} -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils -// RUN: %{compile} | %{run} | FileCheck %s +// RUN: %{compile} -func.func @test_outerproduct_with_accumulator_2x2xf64() { - %f1 = arith.constant 1.0 : f64 - %f2 = arith.constant 2.0 : f64 - %f10 = arith.constant 10.0 : f64 +// RUN: %{run} | FileCheck %s + +// REDEFINE: %{entry_point} = test_outerproduct_with_accumulator_2x2xf64 +// RUN: %{run} | FileCheck %s --check-prefix=WITH-ACC + +// REDEFINE: %{entry_point} = test_masked_outerproduct_no_accumulator_2x2xf64 +// RUN: %{run} | FileCheck %s --check-prefix=WITH-MASK + +// REDEFINE: %{entry_point} = test_masked_outerproduct_with_accumulator_2x2xf64 +// RUN: %{run} | FileCheck %s --check-prefix=WITH-MASK-AND-ACC + +func.func @test_outerproduct_no_accumulator_2x2xf64() { + %c0 = arith.constant 0 : index + %ones = arith.constant dense<1> : vector<[2]xi32> - %a = vector.splat %f1 : vector<[2]xf64> - %b = vector.splat %f2 : vector<[2]xf64> - // TODO: vector.splat doesn't support ArmSME. - %c = vector.broadcast %f10 : f64 to vector<[2]x[2]xf64> + %step_vector = llvm.intr.experimental.stepvector : vector<[2]xi32> + %vector_i32 = arith.addi %step_vector, %ones : vector<[2]xi32> + %vector = arith.sitofp %vector_i32 : vector<[2]xi32> to vector<[2]xf64> - %tile = vector.outerproduct %a, %b, %c : vector<[2]xf64>, vector<[2]xf64> + %lhsDim = arith.constant 1 : index + %rhsDim = arith.constant 2 : index + %mask = vector.create_mask %lhsDim, %rhsDim : vector<[2]x[2]xi1> + + %tile = vector.outerproduct %vector, %vector : vector<[2]xf64>, vector<[2]xf64> // Print the tile. The smallest SVL is 128-bits so the tile will be at least // 2x2xf64. // // CHECK: TILE BEGIN - // CHECK-NEXT: ( 12, 12 - // CHECK-NEXT: ( 12, 12 + // CHECK-NEXT: ( 1, 2 + // CHECK-NEXT: ( 2, 4 // CHECK: TILE END vector.print str "TILE BEGIN" vector.print %tile : vector<[2]x[2]xf64> @@ -36,3 +49,90 @@ func.func @test_outerproduct_with_accumulator_2x2xf64() { return } + +func.func @test_outerproduct_with_accumulator_2x2xf64() { + %c0 = arith.constant 0 : index + %ones = arith.constant dense<1> : vector<[2]xi32> + %f10 = arith.constant 10.0 : f64 + + %acc = vector.splat %f10 : vector<[2]x[2]xf64> + %step_vector = llvm.intr.experimental.stepvector : vector<[2]xi32> + %vector_i32 = arith.addi %step_vector, %ones : vector<[2]xi32> + %vector = arith.sitofp %vector_i32 : vector<[2]xi32> to vector<[2]xf64> + + %tile = vector.outerproduct %vector, %vector, %acc : vector<[2]xf64>, vector<[2]xf64> + + // Print the tile. The smallest SVL is 128-bits so the tile will be at least + // 2x2xf64. + // + // WITH-ACC: TILE BEGIN + // WITH-ACC-NEXT: ( 11, 12 + // WITH-ACC-NEXT: ( 12, 14 + // WITH-ACC: TILE END + vector.print str "TILE BEGIN" + vector.print %tile : vector<[2]x[2]xf64> + vector.print str "TILE END" + + return +} + +func.func @test_masked_outerproduct_no_accumulator_2x2xf64() { + %c0 = arith.constant 0 : index + %ones = arith.constant dense<1> : vector<[2]xi32> + %f10 = arith.constant 10.0 : f64 + + %step_vector = llvm.intr.experimental.stepvector : vector<[2]xi32> + %vector_i32 = arith.addi %step_vector, %ones : vector<[2]xi32> + %vector = arith.sitofp %vector_i32 : vector<[2]xi32> to vector<[2]xf64> + + %lhsDim = arith.constant 2 : index + %rhsDim = arith.constant 1 : index + %mask = vector.create_mask %lhsDim, %rhsDim : vector<[2]x[2]xi1> + + %tile = vector.mask %mask { + vector.outerproduct %vector, %vector : vector<[2]xf64>, vector<[2]xf64> + } : vector<[2]x[2]xi1> -> vector<[2]x[2]xf64> + + // Print the tile. Due to masking the result will be the top 2x1xf64 section. + // + // WITH-MASK: TILE BEGIN + // WITH-MASK-NEXT: ( 1, 0 + // WITH-MASK-NEXT: ( 2, 0 + // WITH-MASK: TILE END + vector.print str "TILE BEGIN" + vector.print %tile : vector<[2]x[2]xf64> + vector.print str "TILE END" + + return +} + +func.func @test_masked_outerproduct_with_accumulator_2x2xf64() { + %c0 = arith.constant 0 : index + %ones = arith.constant dense<1> : vector<[2]xi32> + %f10 = arith.constant 10.0 : f64 + + %acc = vector.splat %f10 : vector<[2]x[2]xf64> + %step_vector = llvm.intr.experimental.stepvector : vector<[2]xi32> + %vector_i32 = arith.addi %step_vector, %ones : vector<[2]xi32> + %vector = arith.sitofp %vector_i32 : vector<[2]xi32> to vector<[2]xf64> + + %lhsDim = arith.constant 1 : index + %rhsDim = arith.constant 2 : index + %mask = vector.create_mask %lhsDim, %rhsDim : vector<[2]x[2]xi1> + + %tile = vector.mask %mask { + vector.outerproduct %vector, %vector, %acc : vector<[2]xf64>, vector<[2]xf64> + } : vector<[2]x[2]xi1> -> vector<[2]x[2]xf64> + + // Print the tile. Due to masking the result will be the top 1x2xf64 section. + // + // WITH-MASK-AND-ACC: TILE BEGIN + // WITH-MASK-AND-ACC-NEXT: ( 11, 12 + // WITH-MASK-AND-ACC-NEXT: ( 10, 10 + // WITH-MASK-AND-ACC: TILE END + vector.print str "TILE BEGIN" + vector.print %tile : vector<[2]x[2]xf64> + vector.print str "TILE END" + + return +} From 6cbcb793506de2cf76dae19269aa4b47e1b55d43 Mon Sep 17 00:00:00 2001 From: lorenzo chelini Date: Tue, 31 Oct 2023 10:07:35 +0100 Subject: [PATCH 137/144] [MLIR][Linalg] Introduce SpecializeOp (#70326) Introduce an operation to specialize linalg.generics, for example, detecting a linalg.generic that is semantically equivalent to a linalg.copy and replacing the former with the latter. After code generation, it is helpful to lower named operations to vendor-optimized libraries. --- .../mlir/Dialect/Linalg/IR/LinalgInterfaces.h | 3 + .../Linalg/TransformOps/LinalgTransformOps.td | 37 +++++ .../Dialect/Linalg/Transforms/Transforms.h | 5 + .../Dialect/Linalg/IR/LinalgInterfaces.cpp | 22 +++ .../TransformOps/LinalgTransformOps.cpp | 24 +++ .../Dialect/Linalg/Transforms/CMakeLists.txt | 1 + .../Dialect/Linalg/Transforms/Specialize.cpp | 32 ++++ .../Linalg/transform-op-specialize.mlir | 143 ++++++++++++++++++ 8 files changed, 267 insertions(+) create mode 100644 mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp create mode 100644 mlir/test/Dialect/Linalg/transform-op-specialize.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h index f6ba6586a81a24..6c8240267e7d05 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h @@ -110,6 +110,9 @@ FailureOr inferConvolutionDims(LinalgOp linalgOp); // TODO: embed within `isa` if possible / natural. bool isaConvolutionOpInterface(LinalgOp linalgOp); +/// Checks whether `linalgOp` is semantically equivalent to a `linalg.copyOp`. +bool isaCopyOpInterface(LinalgOp linalgOp); + namespace detail { /// Returns true if the block contains a contraction of the following form: diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 1ff88d036bc036..9e3f79e64bb1d7 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -390,6 +390,43 @@ def GeneralizeOp : Op { + let description = [{ + Transforms a generic operation into the equivalent named form. + + #### Return modes + + This operation ignores non-Linalg ops and drops them in the return. If all + the operations referred to by the `target` handle specialize, the transform + succeeds; otherwise, the operation produces a silenceable failure. The return + handle points to only the subset of successfully produced equivalent named + operations, which can be empty or contain the original ops if they were already + in named form. The supported specialization to named Linalg operations are: + - linalg.copy of any rank. + }]; + + let arguments = (ins TransformHandleTypeInterface:$target); + let results = (outs TransformHandleTypeInterface:$transformed); + let assemblyFormat = + "$target attr-dict `:` " + "custom(type($target), type($transformed))"; + + let extraClassDeclaration = [{ + ::mlir::DiagnosedSilenceableFailure applyToOne( + ::mlir::transform::TransformRewriter &rewriter, + ::mlir::linalg::LinalgOp target, + ::mlir::transform::ApplyToEachResultList &results, + ::mlir::transform::TransformState &state); + }]; +} + //===----------------------------------------------------------------------===// // InterchangeOp //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index fbe2923c710aab..122f7356285210 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -668,6 +668,11 @@ FailureOr interchangeGenericOp(RewriterBase &rewriter, FailureOr generalizeNamedOp(RewriterBase &rewriter, LinalgOp namedOp); +/// Create a namedOp from the given GenericOp and replace the GenericOp. +/// Currently we can specialize only trivial linalg copy operations. +FailureOr specializeGenericOp(RewriterBase &rewriter, + GenericOp genericOp); + /// Create a new buffer using the `allocationFn` provided. The size of this /// buffer is the smallest constant bounding size along each dimension that /// can be computed for the size of the result of `subView`. Returns the diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp index 5fde8d71cac3e7..dfd6b991e7da15 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp @@ -32,6 +32,7 @@ using namespace mlir::linalg; //===----------------------------------------------------------------------===// // Interface utility functions //===----------------------------------------------------------------------===// + bool linalg::detail::canOpOperandsBeDroppedImpl( linalg::LinalgOp linalgOp, ArrayRef droppedOperands) { SmallVector indexingMaps; @@ -48,6 +49,27 @@ bool linalg::detail::canOpOperandsBeDroppedImpl( return inversePermutation(concatAffineMaps(indexingMaps)) != AffineMap(); } +//===----------------------------------------------------------------------===// +// CopyOpInterface implementation +//===----------------------------------------------------------------------===// + +bool linalg::isaCopyOpInterface(LinalgOp linalgOp) { + // Structural. + if (linalgOp.getNumParallelLoops() != linalgOp.getNumLoops()) + return false; + + // Operands and maps. + if (linalgOp.getNumDpsInputs() != 1 || linalgOp.getNumDpsInits() != 1) + return false; + auto mapRange = linalgOp.getIndexingMapsArray(); + if (mapRange.size() != 2 || !mapRange.front().isIdentity() || + !mapRange.back().isIdentity()) { + return false; + } + // Region. + return llvm::hasSingleElement(linalgOp.getBlock()->getOperations()); +} + //===----------------------------------------------------------------------===// // ContractionOpInterface implementation //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 8508507871d0c6..87be3bb85b6e78 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -1018,6 +1018,30 @@ transform::GeneralizeOp::applyToOne(transform::TransformRewriter &rewriter, return emitDefaultSilenceableFailure(target); } +//===----------------------------------------------------------------------===// +// SpecializeOp +//===----------------------------------------------------------------------===/ + +DiagnosedSilenceableFailure +transform::SpecializeOp::applyToOne(transform::TransformRewriter &rewriter, + LinalgOp target, + transform::ApplyToEachResultList &results, + transform::TransformState &state) { + // Exit early if the operation is not a generic. + if (!isa(target)) { + results.push_back(target); + return DiagnosedSilenceableFailure::success(); + } + rewriter.setInsertionPoint(target); + FailureOr named = + specializeGenericOp(rewriter, cast(target)); + if (succeeded(named)) { + results.push_back(named->getOperation()); + return DiagnosedSilenceableFailure::success(); + } + return emitDefaultSilenceableFailure(target); +} + //===----------------------------------------------------------------------===// // InterchangeOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index bad246c262979b..e0a43a29c32d88 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -24,6 +24,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms NamedOpConversions.cpp Padding.cpp Promotion.cpp + Specialize.cpp Split.cpp SplitReduction.cpp SubsetHoisting.cpp diff --git a/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp new file mode 100644 index 00000000000000..4c437b5db2c7b0 --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp @@ -0,0 +1,32 @@ +//===- Specialize.cpp - linalg generic ops to named ops ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a method to specialize generic operations to named +// operations. Conceptually it is the opposite of generalize.cpp. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "linalg-specialization" + +using namespace mlir; +using namespace mlir::linalg; + +FailureOr mlir::linalg::specializeGenericOp(RewriterBase &rewriter, + GenericOp genericOp) { + if (isaCopyOpInterface(genericOp)) { + LinalgOp namedOp = rewriter.replaceOpWithNewOp( + genericOp, genericOp.getDpsInputs()[0], genericOp.getDpsInits()[0]); + return namedOp; + } + return failure(); +} diff --git a/mlir/test/Dialect/Linalg/transform-op-specialize.mlir b/mlir/test/Dialect/Linalg/transform-op-specialize.mlir new file mode 100644 index 00000000000000..8a22c115f31170 --- /dev/null +++ b/mlir/test/Dialect/Linalg/transform-op-specialize.mlir @@ -0,0 +1,143 @@ +// RUN: mlir-opt --transform-interpreter --split-input-file --verify-diagnostics %s | FileCheck %s + +#map = affine_map<(d0, d1) -> (d0, d1)> +#map1 = affine_map<(d0, d1) -> (d0)> +#map2 = affine_map<(d0, d1) -> (d1, d0)> + +func.func @broadcast_copy_expect_no_match(%arg0: memref, %arg1: memref) { + // expected-note @below {{when applied to this op}} + linalg.generic { + indexing_maps = [#map1, #map], + iterator_types = ["parallel", "parallel"]} + ins(%arg0 : memref) outs(%arg1 : memref) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } + return +} + +func.func @not_a_copy_expect_no_match(%arg0: memref, %arg1: memref) { + // expected-note @below {{when applied to this op}} + linalg.generic { + indexing_maps = [#map, #map], + iterator_types = ["parallel", "parallel"]} + ins(%arg0 : memref) outs(%arg1 : memref) { + ^bb0(%in: f32, %out: f32): + %0 = arith.addf %in, %out : f32 + linalg.yield %0 : f32 + } + return +} + +func.func @transpose_op_expect_no_match(%arg0: memref, %arg1: memref) { + // expected-note @below {{when applied to this op}} + linalg.generic { + indexing_maps = [#map, #map2], + iterator_types = ["parallel", "parallel"]} + ins(%arg0 : memref) outs(%arg1 : memref) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } + return +} + +func.func @copy_with_up_cast(%arg0: memref, %arg1: memref) { + // expected-note @below {{when applied to this op}} + linalg.generic { + indexing_maps = [#map, #map], + iterator_types = ["parallel", "parallel"]} + ins(%arg0 : memref) outs(%arg1 : memref) { + ^bb0(%in: f16, %out: f32): + %0 = arith.extf %in : f16 to f32 + linalg.yield %0 : f32 + } + return +} + +func.func @copy_with_down_cast(%arg0: memref, %arg1: memref) { + // expected-note @below {{when applied to this op}} + linalg.generic { + indexing_maps = [#map, #map], + iterator_types = ["parallel", "parallel"]} + ins(%arg0 : memref) outs(%arg1 : memref) { + ^bb0(%in: f32, %out: f16): + %0 = arith.truncf %in : f32 to f16 + linalg.yield %0 : f16 + } + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match interface{LinalgOp} in %arg1 : (!transform.any_op) -> !transform.any_op + // expected-error @below {{failed to apply}} + %1 = transform.structured.specialize %0 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +#map = affine_map<(d0, d1) -> (d0, d1)> + +func.func @specialize_trivial_copy_memref(%arg0: memref, %arg1: memref) { + linalg.generic { + indexing_maps = [#map, #map], + iterator_types = ["parallel", "parallel"]} + ins(%arg0 : memref) outs(%arg1 : memref) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } + return +} + +// CHECK-LABEL: specialize_trivial_copy_memref +// CHECK-SAME: %[[ARG0:.+]]: memref, %[[ARG1:.+]]: memref +// CHECK-NOT: linalg.generic +// CHECK: linalg.copy ins(%[[ARG0]] : memref) outs(%[[ARG1]] : memref) + +#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> + +func.func @specialize_trivial_copy_tensor(%arg0: tensor, + %arg1: tensor) -> tensor { + %0 = linalg.generic { + indexing_maps = [#map1, #map1], + iterator_types = ["parallel", "parallel", "parallel"]} + ins(%arg0 : tensor) outs(%arg1 : tensor) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor + return %0 : tensor +} + +// CHECK-LABEL: specialize_trivial_copy_tensor +// CHECK-SAME: %[[ARG0:.+]]: tensor, %[[ARG1:.+]]: tensor +// CHECK-NOT: linalg.generic +// CHECK: %{{.+}} = linalg.copy ins(%[[ARG0]] : tensor) outs(%[[ARG1]] : tensor) + +func.func @already_trivial_copy_memref(%arg0: memref, %arg1: memref) { + linalg.copy ins(%arg0: memref) outs(%arg1: memref) + return +} + +// CHECK-LABEL: already_trivial_copy_memref +// CHECK-SAME: %[[ARG0:.+]]: memref, %[[ARG1:.+]]: memref +// CHECK: linalg.copy ins(%[[ARG0]] : memref) outs(%[[ARG1]] : memref) + +func.func @already_trivial_copy_tensor(%arg0: tensor, + %arg1: tensor) -> tensor { + %0 = linalg.copy ins(%arg0: tensor) outs(%arg1: tensor) -> tensor + return %0 : tensor +} + +// CHECK-LABEL: already_trivial_copy_tensor +// CHECK-SAME: %[[ARG0:.+]]: tensor, %[[ARG1:.+]]: tensor +// CHECK: %{{.+}} = linalg.copy ins(%[[ARG0]] : tensor) outs(%[[ARG1]] : tensor) + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match interface{LinalgOp} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.structured.specialize %0 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} From 61b9176cf70444c54f3ac6eebd82fc9ffd69944d Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 31 Oct 2023 09:11:50 +0000 Subject: [PATCH 138/144] [llvm][TableGen] Add Compiler Explorer link to README As Compiler Explorer now has trunk llvm-tblgen available. --- llvm/utils/TableGen/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/TableGen/README.md b/llvm/utils/TableGen/README.md index e19701acfce63a..3bee6555566a61 100644 --- a/llvm/utils/TableGen/README.md +++ b/llvm/utils/TableGen/README.md @@ -23,6 +23,7 @@ def HelloWorld { // Hello string msg = "Hello world!"; } ``` +[Try this example on Compiler Explorer.](https://godbolt.org/z/13xo1P5oz) The internalized records are passed on to various backends, which extract information from a subset of the records and generate one or more output files. From 33b85867e30e1adc2ff2173039c199b81c10f52b Mon Sep 17 00:00:00 2001 From: Ying Yi Date: Fri, 1 Sep 2023 15:30:44 +0100 Subject: [PATCH 139/144] Add two time-trace scope variables. A time trace scope variable of `ParseDeclarationOrFunctionDefinition` with the function's source location is added to record the time spent parsing the function's declaration or definition. Another time trace scope variable of `ParseFunctionDefinition` is also added to record the name of the defined function. A release note is added as well. Reviewed by: Aaron Ballman Pull request: #65268 --- clang/docs/ReleaseNotes.rst | 8 ++++ clang/lib/Parse/Parser.cpp | 14 +++++- ...e-ParseDeclarationOrFunctionDefinition.cpp | 15 +++++++ clang/unittests/Support/TimeProfilerTest.cpp | 44 +++++++++++-------- 4 files changed, 62 insertions(+), 19 deletions(-) create mode 100644 clang/test/Driver/check-time-trace-ParseDeclarationOrFunctionDefinition.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index bc28bb567f6932..c151bd9d234b51 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -427,6 +427,14 @@ Improvements to Clang's diagnostics (or, more commonly, ``NULL`` when the platform defines it as ``__null``) to be more consistent with GCC. +Improvements to Clang's time-trace +---------------------------------- +- Two time-trace scope variables are added. A time trace scope variable of + ``ParseDeclarationOrFunctionDefinition`` with the function's source location + is added to record the time spent parsing the function's declaration or + definition. Another time trace scope variable of ``ParseFunctionDefinition`` + is also added to record the name of the defined function. + Bug Fixes in This Version ------------------------- - Fixed an issue where a class template specialization whose declaration is diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 0f930248e77174..bef3a0dcb285ef 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -13,8 +13,8 @@ #include "clang/Parse/Parser.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" -#include "clang/AST/DeclTemplate.h" #include "clang/AST/ASTLambda.h" +#include "clang/AST/DeclTemplate.h" #include "clang/Basic/FileManager.h" #include "clang/Parse/ParseDiagnostic.h" #include "clang/Parse/RAIIObjectsForParser.h" @@ -22,6 +22,7 @@ #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TimeProfiler.h" using namespace clang; @@ -1229,6 +1230,13 @@ Parser::DeclGroupPtrTy Parser::ParseDeclOrFunctionDefInternal( Parser::DeclGroupPtrTy Parser::ParseDeclarationOrFunctionDefinition( ParsedAttributes &Attrs, ParsedAttributes &DeclSpecAttrs, ParsingDeclSpec *DS, AccessSpecifier AS) { + // Add an enclosing time trace scope for a bunch of small scopes with + // "EvaluateAsConstExpr". + llvm::TimeTraceScope TimeScope( + "ParseDeclarationOrFunctionDefinition", + Tok.getLocation().printToString( + Actions.getASTContext().getSourceManager())); + if (DS) { return ParseDeclOrFunctionDefInternal(Attrs, DeclSpecAttrs, *DS, AS); } else { @@ -1259,6 +1267,10 @@ Parser::DeclGroupPtrTy Parser::ParseDeclarationOrFunctionDefinition( Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D, const ParsedTemplateInfo &TemplateInfo, LateParsedAttrList *LateParsedAttrs) { + llvm::TimeTraceScope TimeScope( + "ParseFunctionDefinition", + Actions.GetNameForDeclarator(D).getName().getAsString()); + // Poison SEH identifiers so they are flagged as illegal in function bodies. PoisonSEHIdentifiersRAIIObject PoisonSEHIdentifiers(*this, true); const DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo(); diff --git a/clang/test/Driver/check-time-trace-ParseDeclarationOrFunctionDefinition.cpp b/clang/test/Driver/check-time-trace-ParseDeclarationOrFunctionDefinition.cpp new file mode 100644 index 00000000000000..f854cddadbfcc1 --- /dev/null +++ b/clang/test/Driver/check-time-trace-ParseDeclarationOrFunctionDefinition.cpp @@ -0,0 +1,15 @@ +// RUN: %clangxx -S -ftime-trace -ftime-trace-granularity=0 -o %T/check-time-trace-ParseDeclarationOrFunctionDefinition %s +// RUN: cat %T/check-time-trace-ParseDeclarationOrFunctionDefinition.json \ +// RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ +// RUN: | FileCheck %s + +// CHECK-DAG: "name": "ParseDeclarationOrFunctionDefinition" +// CHECK-DAG: "detail": "{{.*}}check-time-trace-ParseDeclarationOrFunctionDefinition.cpp:15:1" +// CHECK-DAG: "name": "ParseFunctionDefinition" +// CHECK-DAG: "detail": "foo" +// CHECK-DAG: "name": "ParseFunctionDefinition" +// CHECK-DAG: "detail": "bar" + +template +void foo(T) {} +void bar() { foo(0); } diff --git a/clang/unittests/Support/TimeProfilerTest.cpp b/clang/unittests/Support/TimeProfilerTest.cpp index a7ca2bf91e474e..97fdbb7232b135 100644 --- a/clang/unittests/Support/TimeProfilerTest.cpp +++ b/clang/unittests/Support/TimeProfilerTest.cpp @@ -177,22 +177,29 @@ constexpr int slow_init_list[] = {1, 1, 2, 3, 5, 8, 13, 21}; // 25th line std::string TraceGraph = buildTraceGraph(Json); ASSERT_TRUE(TraceGraph == R"( Frontend -| EvaluateAsRValue () -| EvaluateForOverflow () -| EvaluateForOverflow () -| EvaluateAsRValue () -| EvaluateForOverflow () -| isPotentialConstantExpr (slow_namespace::slow_func) -| EvaluateAsBooleanCondition () -| | EvaluateAsRValue () -| EvaluateAsBooleanCondition () -| | EvaluateAsRValue () -| EvaluateAsInitializer (slow_value) -| EvaluateAsConstantExpr () -| EvaluateAsConstantExpr () -| EvaluateAsConstantExpr () -| EvaluateAsRValue () -| EvaluateAsInitializer (slow_init_list) +| ParseDeclarationOrFunctionDefinition (test.cc:2:1) +| ParseDeclarationOrFunctionDefinition (test.cc:6:1) +| | ParseFunctionDefinition (slow_func) +| | | EvaluateAsRValue () +| | | EvaluateForOverflow () +| | | EvaluateForOverflow () +| | | EvaluateAsRValue () +| | | EvaluateForOverflow () +| | | isPotentialConstantExpr (slow_namespace::slow_func) +| | | EvaluateAsBooleanCondition () +| | | | EvaluateAsRValue () +| | | EvaluateAsBooleanCondition () +| | | | EvaluateAsRValue () +| ParseDeclarationOrFunctionDefinition (test.cc:16:1) +| | ParseFunctionDefinition (slow_test) +| | | EvaluateAsInitializer (slow_value) +| | | EvaluateAsConstantExpr () +| | | EvaluateAsConstantExpr () +| ParseDeclarationOrFunctionDefinition (test.cc:22:1) +| | EvaluateAsConstantExpr () +| | EvaluateAsRValue () +| ParseDeclarationOrFunctionDefinition (test.cc:25:1) +| | EvaluateAsInitializer (slow_init_list) | PerformPendingInstantiations )"); @@ -213,8 +220,9 @@ struct { std::string TraceGraph = buildTraceGraph(Json); ASSERT_TRUE(TraceGraph == R"( Frontend -| isIntegerConstantExpr () -| EvaluateKnownConstIntCheckOverflow () +| ParseDeclarationOrFunctionDefinition (test.c:2:1) +| | isIntegerConstantExpr () +| | EvaluateKnownConstIntCheckOverflow () | PerformPendingInstantiations )"); From 75881dbb0fa5dcfe08518b6fb72621cbf60f45e2 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 31 Oct 2023 10:20:07 +0100 Subject: [PATCH 140/144] [JumpThreading] Don't phi translate past loop phi (#70664) When evaluating comparisons in predecessors, phi operands are translated into the predecessor. If the translation is across a backedge, this means that the two operands of the icmp will be from two different loop iterations, resulting in incorrect simplification. Fix this by not performing the phi translation for phis in loop headers. Note: This is not a complete fix. If the jump-threading-across-loop-headers option is enabled, the LoopHeaders variable does not get populated. Additional changes will be needed to fix that case. Related to https://github.com/llvm/llvm-project/issues/70651. --- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 5 ++++- llvm/test/Transforms/JumpThreading/pr70651.ll | 19 +++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index f2b9d784ead8af..7a8128c5b6c090 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -761,7 +761,10 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl( PHINode *PN = dyn_cast(CmpLHS); if (!PN) PN = dyn_cast(CmpRHS); - if (PN && PN->getParent() == BB) { + // Do not perform phi translation across a loop header phi, because this + // may result in comparison of values from two different loop iterations. + // FIXME: This check is broken if LoopHeaders is not populated. + if (PN && PN->getParent() == BB && !LoopHeaders.contains(BB)) { const DataLayout &DL = PN->getModule()->getDataLayout(); // We can do this simplification if any comparisons fold to true or false. // See if any do. diff --git a/llvm/test/Transforms/JumpThreading/pr70651.ll b/llvm/test/Transforms/JumpThreading/pr70651.ll index a156be541874a6..2c6059bfdb3d56 100644 --- a/llvm/test/Transforms/JumpThreading/pr70651.ll +++ b/llvm/test/Transforms/JumpThreading/pr70651.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -passes=jump-threading < %s | FileCheck %s +; RUN: opt -S -passes=jump-threading -jump-threading-across-loop-headers < %s | FileCheck %s --check-prefix=THREAD-LOOP -; FIXME: This is a miscompile. +; FIXME: This is a miscompile if -jump-threading-across-loop-headers is enabled. define i64 @test(i64 %v) { ; CHECK-LABEL: define i64 @test( ; CHECK-SAME: i64 [[V:%.*]]) { @@ -12,10 +13,24 @@ define i64 @test(i64 %v) { ; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[V]] ; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ult i64 [[SUM_NEXT]], [[SUM]] -; CHECK-NEXT: br i1 [[V_NONNEG]], label [[FOR_BODY]], label [[EXIT:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = xor i1 [[V_NONNEG]], [[OVERFLOW]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret i64 [[SUM]] ; +; THREAD-LOOP-LABEL: define i64 @test( +; THREAD-LOOP-SAME: i64 [[V:%.*]]) { +; THREAD-LOOP-NEXT: entry: +; THREAD-LOOP-NEXT: [[V_NONNEG:%.*]] = icmp sgt i64 [[V]], -1 +; THREAD-LOOP-NEXT: br label [[FOR_BODY:%.*]] +; THREAD-LOOP: for.body: +; THREAD-LOOP-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] +; THREAD-LOOP-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[V]] +; THREAD-LOOP-NEXT: [[OVERFLOW:%.*]] = icmp ult i64 [[SUM_NEXT]], [[SUM]] +; THREAD-LOOP-NEXT: br i1 [[V_NONNEG]], label [[FOR_BODY]], label [[EXIT:%.*]] +; THREAD-LOOP: exit: +; THREAD-LOOP-NEXT: ret i64 [[SUM]] +; entry: %v.nonneg = icmp sgt i64 %v, -1 br label %for.body From 83bf8e9a12719c8166f77615440c275f136f4066 Mon Sep 17 00:00:00 2001 From: NimishMishra <42909663+NimishMishra@users.noreply.github.com> Date: Tue, 31 Oct 2023 02:34:37 -0700 Subject: [PATCH 141/144] [flang][OpenMP] Port OpenMP FIR tests for atomic update/capture to HLFIR (#70627) Port atomic update and capture tests to HLFIR. --- flang/test/Lower/OpenMP/atomic-capture.f90 | 98 ++++++++++++ .../test/Lower/OpenMP/atomic-update-hlfir.f90 | 23 --- flang/test/Lower/OpenMP/atomic-update.f90 | 149 ++++++++++++++++++ 3 files changed, 247 insertions(+), 23 deletions(-) create mode 100644 flang/test/Lower/OpenMP/atomic-capture.f90 delete mode 100644 flang/test/Lower/OpenMP/atomic-update-hlfir.f90 create mode 100644 flang/test/Lower/OpenMP/atomic-update.f90 diff --git a/flang/test/Lower/OpenMP/atomic-capture.f90 b/flang/test/Lower/OpenMP/atomic-capture.f90 new file mode 100644 index 00000000000000..cde0281dbdc849 --- /dev/null +++ b/flang/test/Lower/OpenMP/atomic-capture.f90 @@ -0,0 +1,98 @@ +! This test checks the lowering of atomic capture + +! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s + + + +program OmpAtomicCapture + use omp_lib + +!CHECK: %[[VAL_X_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} +!CHECK: %[[VAL_X_DECLARE:.*]]:2 = hlfir.declare %[[VAL_X_ALLOCA]] {{.*}} +!CHECK: %[[VAL_Y_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} +!CHECK: %[[VAL_Y_DECLARE:.*]]:2 = hlfir.declare %[[VAL_Y_ALLOCA]] {{.*}} + integer :: x, y + +!CHECK: %[[VAL_Y_LOADED:.*]] = fir.load %[[VAL_X_DECLARE]]#0 : !fir.ref +!CHECK: omp.atomic.capture hint(uncontended) { +!CHECK: omp.atomic.update %[[VAL_Y_DECLARE]]#1 : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[TEMP:.*]] = arith.muli %[[VAL_Y_LOADED]], %[[ARG]] : i32 +!CHECK: omp.yield(%[[TEMP]] : i32) +!CHECK: } +!CHECK: omp.atomic.read %[[VAL_X_DECLARE]]#1 = %[[VAL_Y_DECLARE]]#1 : !fir.ref, i32 +!CHECK: } + !$omp atomic hint(omp_sync_hint_uncontended) capture + y = x * y + x = y + !$omp end atomic + +!CHECK: %[[VAL_20:.*]] = arith.constant 20 : i32 +!CHECK: %[[VAL_8:.*]] = arith.constant 8 : i32 +!CHECK: %[[VAL_X_LOADED:.*]] = fir.load %[[VAL_X_DECLARE]]#0 : !fir.ref +!CHECK: %[[SUB:.*]] = arith.subi %[[VAL_8]], %[[VAL_X_LOADED]] : i32 +!CHECK: %[[NO_REASSOC:.*]] = hlfir.no_reassoc %[[SUB]] : i32 +!CHECK: %[[ADD:.*]] = arith.addi %[[VAL_20]], %[[NO_REASSOC]] : i32 +!CHECK: omp.atomic.capture memory_order(acquire) hint(nonspeculative) { +!CHECK: omp.atomic.read %[[VAL_X_DECLARE]]#1 = %[[VAL_Y_DECLARE]]#1 : !fir.ref, i32 +!CHECK: omp.atomic.write %[[VAL_Y_DECLARE]]#1 = %[[ADD]] : !fir.ref, i32 +!CHECK: } +!CHECK: return +!CHECK: } + !$omp atomic hint(omp_lock_hint_nonspeculative) capture acquire + x = y + y = 2 * 10 + (8 - x) + !$omp end atomic +end program + + +!CHECK: func.func @_QPpointers_in_atomic_capture() { +subroutine pointers_in_atomic_capture() + +!CHECK: %[[VAL_A_ALLOCA:.*]] = fir.alloca !fir.box> {bindc_name = "a", uniq_name = "_QFpointers_in_atomic_captureEa"} +!CHECK: %[[ZERO:.*]] = fir.zero_bits !fir.ptr +!CHECK: %[[EMBOX:.*]] = fir.embox %[[ZERO]] : (!fir.ptr) -> !fir.box> +!CHECK: fir.store %[[EMBOX]] to %[[VAL_A_ALLOCA]] : !fir.ref>> +!CHECK: %[[VAL_A_DECLARE:.*]]:2 = hlfir.declare %[[VAL_A_ALLOCA]] {{.*}} +!CHECK: %[[VAL_B_ALLOCA:.*]] = fir.alloca !fir.box> {bindc_name = "b", uniq_name = "_QFpointers_in_atomic_captureEb"} +!CHECK: %[[ZERO:.*]] = fir.zero_bits !fir.ptr +!CHECK: %[[EMBOX:.*]] = fir.embox %[[ZERO]] : (!fir.ptr) -> !fir.box> +!CHECK: fir.store %[[EMBOX]] to %[[VAL_B_ALLOCA]] : !fir.ref>> +!CHECK: %[[VAL_B_DECLARE:.*]]:2 = hlfir.declare %[[VAL_B_ALLOCA]] {{.*}} +!CHECK: %[[VAL_C_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "c", fir.target, uniq_name = "_QFpointers_in_atomic_captureEc"} +!CHECK: %[[VAL_C_DECLARE:.*]]:2 = hlfir.declare %[[VAL_C_ALLOCA]] {{.*}} +!CHECK: %[[VAL_D_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "d", fir.target, uniq_name = "_QFpointers_in_atomic_captureEd"} +!CHECK: %[[VAL_D_DECLARE:.*]]:2 = hlfir.declare %[[VAL_D_ALLOCA]] {{.*}} + integer, pointer :: a, b + integer, target :: c, d + +!CHECK: %[[EMBOX:.*]] = fir.embox %[[VAL_C_DECLARE]]#1 : (!fir.ref) -> !fir.box> +!CHECK: fir.store %[[EMBOX]] to %[[VAL_A_DECLARE]]#1 : !fir.ref>> +!CHECK: %[[EMBOX:.*]] = fir.embox %[[VAL_D_DECLARE]]#1 : (!fir.ref) -> !fir.box> +!CHECK: fir.store %[[EMBOX]] to %[[VAL_B_DECLARE]]#1 : !fir.ref>> + a=>c + b=>d + +!CHECK: %[[VAL_A_LOADED:.*]] = fir.load %[[VAL_A_DECLARE]]#0 : !fir.ref>> +!CHECK: %[[VAL_A_BOX_ADDR:.*]] = fir.box_addr %[[VAL_A_LOADED]] : (!fir.box>) -> !fir.ptr +!CHECK: %[[VAL_B_LOADED:.*]] = fir.load %[[VAL_B_DECLARE]]#0 : !fir.ref>> +!CHECK: %[[VAL_B_BOX_ADDR:.*]] = fir.box_addr %[[VAL_B_LOADED]] : (!fir.box>) -> !fir.ptr +!CHECK: %[[VAL_B_LOADED_2:.*]] = fir.load %[[VAL_B_DECLARE]]#0 : !fir.ref>> +!CHECK: %[[VAL_B_BOX_ADDR_2:.*]] = fir.box_addr %[[VAL_B_LOADED_2]] : (!fir.box>) -> !fir.ptr +!CHECK: %[[VAL_B:.*]] = fir.load %[[VAL_B_BOX_ADDR_2]] : !fir.ptr +!CHECK: omp.atomic.capture { +!CHECK: omp.atomic.update %[[VAL_A_BOX_ADDR]] : !fir.ptr { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[TEMP:.*]] = arith.addi %[[ARG]], %[[VAL_B]] : i32 +!CHECK: omp.yield(%[[TEMP]] : i32) +!CHECK: } +!CHECK: omp.atomic.read %[[VAL_B_BOX_ADDR]] = %[[VAL_A_BOX_ADDR]] : !fir.ptr, i32 +!CHECK: } +!CHECK: return +!CHECK: } + !$omp atomic capture + a = a + b + b = a + !$omp end atomic +end subroutine diff --git a/flang/test/Lower/OpenMP/atomic-update-hlfir.f90 b/flang/test/Lower/OpenMP/atomic-update-hlfir.f90 deleted file mode 100644 index 329009ab8ef8e9..00000000000000 --- a/flang/test/Lower/OpenMP/atomic-update-hlfir.f90 +++ /dev/null @@ -1,23 +0,0 @@ -! This test checks lowering of atomic and atomic update constructs with HLFIR -! RUN: bbc -hlfir -fopenmp -emit-hlfir %s -o - | FileCheck %s -! RUN: %flang_fc1 -flang-experimental-hlfir -emit-hlfir -fopenmp %s -o - | FileCheck %s - -subroutine sb - integer :: x, y - - !$omp atomic update - x = x + y -end subroutine - -!CHECK-LABEL: @_QPsb -!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsbEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFsbEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[Y_REF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFsbEy"} -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_REF]] {uniq_name = "_QFsbEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[Y_VAL:.*]] = fir.load %[[Y_DECL]]#0 : !fir.ref -!CHECK: omp.atomic.update %[[X_DECL]]#1 : !fir.ref { -!CHECK: ^bb0(%[[ARG_X:.*]]: i32): -!CHECK: %[[X_UPDATE_VAL:.*]] = arith.addi %[[ARG_X]], %[[Y_VAL]] : i32 -!CHECK: omp.yield(%[[X_UPDATE_VAL]] : i32) -!CHECK: } -!CHECK: return diff --git a/flang/test/Lower/OpenMP/atomic-update.f90 b/flang/test/Lower/OpenMP/atomic-update.f90 new file mode 100644 index 00000000000000..e6319f70f37365 --- /dev/null +++ b/flang/test/Lower/OpenMP/atomic-update.f90 @@ -0,0 +1,149 @@ +! This test checks lowering of atomic and atomic update constructs +! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s + +program OmpAtomicUpdate + use omp_lib +!CHECK: %[[VAL_A:.*]] = fir.alloca !fir.box> {bindc_name = "a", uniq_name = "_QFEa"} +!CHECK: %[[ZERO:.*]] = fir.zero_bits !fir.ptr +!CHECK: %[[EMBOX:.*]] = fir.embox %[[ZERO]] : (!fir.ptr) -> !fir.box> +!CHECK: fir.store %[[EMBOX]] to %[[VAL_A]] : !fir.ref>> +!CHECK: %[[VAL_A_DECLARE:.*]]:2 = hlfir.declare %[[VAL_A]] {{.*}} +!CHECK: %[[VAL_B:.*]] = fir.alloca !fir.box> {bindc_name = "b", uniq_name = "_QFEb"} +!CHECK: %[[ZERO:.*]] = fir.zero_bits !fir.ptr +!CHECK: %[[EMBOX:.*]] = fir.embox %[[ZERO]] : (!fir.ptr) -> !fir.box> +!CHECK: fir.store %[[EMBOX]] to %[[VAL_B]] : !fir.ref>> +!CHECK: %[[VAL_B_DECLARE:.*]]:2 = hlfir.declare %[[VAL_B]] {{.*}} +!CHECK: %[[VAL_C_ADDRESS:.*]] = fir.address_of(@_QFEc) : !fir.ref +!CHECK: %[[VAL_C_DECLARE:.*]]:2 = hlfir.declare %[[VAL_C_ADDRESS]] {{.*}} +!CHECK: %[[VAL_D_ADDRESS:.*]] = fir.address_of(@_QFEd) : !fir.ref +!CHECK: %[[VAL_D_DECLARE:.*]]:2 = hlfir.declare %[[VAL_D_ADDRESS]] {{.}} +!CHECK: %[[VAL_i1_ALLOCA:.*]] = fir.alloca i8 {bindc_name = "i1", uniq_name = "_QFEi1"} +!CHECK: %[[VAL_i1_DECLARE:.*]]:2 = hlfir.declare %[[VAL_i1_ALLOCA]] {{.*}} +!CHECK: %[[VAL_c5:.*]] = arith.constant 5 : index +!CHECK: %[[VAL_K_ALLOCA:.*]] = fir.alloca !fir.array<5xi32> {bindc_name = "k", uniq_name = "_QFEk"} +!CHECK: %[[VAL_K_SHAPED:.*]] = fir.shape %[[VAL_c5]] : (index) -> !fir.shape<1> +!CHECK: %[[VAL_K_DECLARE:.*]]:2 = hlfir.declare %[[VAL_K_ALLOCA]](%[[VAL_K_SHAPED]]) {{.*}} + +!CHECK: %[[VAL_X_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"} +!CHECK: %[[VAL_X_DECLARE:.*]]:2 = hlfir.declare %[[VAL_X_ALLOCA]] {uniq_name = "_QFEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[VAL_Y_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"} +!CHECK: %[[VAL_Y_DECLARE:.*]]:2 = hlfir.declare %[[VAL_Y_ALLOCA]] {uniq_name = "_QFEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[VAL_Z_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFEz"} +!CHECK: %[[VAL_Z_DECLARE:.*]]:2 = hlfir.declare %[[VAL_Z_ALLOCA]] {uniq_name = "_QFEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) + integer :: x, y, z + integer, pointer :: a, b + integer, target :: c, d + integer(1) :: i1 + integer, dimension(5) :: k + +!CHECK: %[[EMBOX:.*]] = fir.embox %[[VAL_C_DECLARE]]#1 : (!fir.ref) -> !fir.box> +!CHECK: fir.store %[[EMBOX]] to %[[VAL_A_DECLARE]]#1 : !fir.ref>> +!CHECK: %[[EMBOX:.*]] = fir.embox %[[VAL_D_DECLARE]]#1 : (!fir.ref) -> !fir.box> +!CHECK: fir.store %[[EMBOX]] to %[[VAL_B_DECLARE]]#1 : !fir.ref>> + a=>c + b=>d + +!CHECK: %[[VAL_c3:.*]] = arith.constant 3 : index +!CHECK: %[[VAL_K_DESIGNATE:.*]] = hlfir.designate %[[VAL_K_DECLARE]]#0 (%[[VAL_c3]]) : (!fir.ref>, index) -> !fir.ref +!CHECK: %[[LOADED_Z:.*]] = fir.load %[[VAL_Z_DECLARE]]#0 : !fir.ref +!CHECK: omp.atomic.update %[[VAL_K_DESIGNATE]] : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[TEMP:.*]] = arith.muli %[[LOADED_Z]], %[[ARG]] : i32 +!CHECK: omp.yield(%[[TEMP]] : i32) +!CHECK: } + !$omp atomic update + k(3) = z * k(3) + +!CHECK: %[[VAL_A_LOADED:.*]] = fir.load %[[VAL_A_DECLARE]]#0 : !fir.ref>> +!CHECK: %[[VAL_A_BOX_ADDR:.*]] = fir.box_addr %[[VAL_A_LOADED]] : (!fir.box>) -> !fir.ptr +!CHECK: %[[VAL_B_LOADED:.*]] = fir.load %[[VAL_B_DECLARE]]#0 : !fir.ref>> +!CHECK: %[[VAL_B_BOX_ADDR:.*]] = fir.box_addr %[[VAL_B_LOADED]] : (!fir.box>) -> !fir.ptr +!CHECK: %[[VAL_B:.*]] = fir.load %[[VAL_B_BOX_ADDR]] : !fir.ptr +!CHECK: omp.atomic.update %[[VAL_A_BOX_ADDR]] : !fir.ptr { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[TEMP:.*]] = arith.addi %[[ARG]], %[[VAL_B]] : i32 +!CHECK: omp.yield(%[[TEMP]] : i32) +!CHECK: } + !$omp atomic update + a = a + b + +!CHECK: %[[VAL_c1:.*]] = arith.constant 1 : i32 +!CHECK: omp.atomic.update %[[VAL_Y_DECLARE]]#1 : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[TEMP:.*]] = arith.addi %[[ARG]], %[[VAL_c1]] : i32 +!CHECK: omp.yield(%[[TEMP]] : i32) +!CHECK: } + !$omp atomic + y = y + 1 + +!CHECK: %[[VAL_X_LOADED:.*]] = fir.load %[[VAL_X_DECLARE]]#0 : !fir.ref +!CHECK: omp.atomic.update %[[VAL_Z_DECLARE]]#1 : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[TEMP:.*]] = arith.muli %[[VAL_X_LOADED]], %[[ARG]] : i32 +!CHECK: omp.yield(%[[TEMP]] : i32) +!CHECK: } + !$omp atomic update + z = x * z + +!CHECK: %[[VAL_c1:.*]] = arith.constant 1 : i32 +!CHECK: omp.atomic.update memory_order(relaxed) hint(uncontended) %[[VAL_X_DECLARE]]#1 : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[TEMP:.*]] = arith.subi %[[ARG]], %[[VAL_c1]] : i32 +!CHECK: omp.yield(%[[TEMP]] : i32) +!CHECK: } + !$omp atomic relaxed update hint(omp_sync_hint_uncontended) + x = x - 1 + +!CHECK: omp.atomic.update memory_order(relaxed) %[[VAL_Y_DECLARE]]#1 : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[VAL_C_LOADED:.*]] = fir.load %[[VAL_C_DECLARE]]#0 : !fir.ref +!CHECK: %[[VAL_D_LOADED:.*]] = fir.load %[[VAL_D_DECLARE]]#0 : !fir.ref +!CHECK: {{.*}} = arith.cmpi sgt, %[[ARG]], {{.*}} : i32 +!CHECK: {{.*}} = arith.select {{.*}}, %[[ARG]], {{.*}} : i32 +!CHECK: {{.*}} = arith.cmpi sgt, {{.*}} +!CHECK: %[[TEMP:.*]] = arith.select {{.*}} : i32 +!CHECK: omp.yield(%[[TEMP]] : i32) +!CHECK: } + !$omp atomic update relaxed + y = max(y, c, d) + +!CHECK: %[[VAL_X_LOADED:.*]] = fir.load %[[VAL_X_DECLARE]]#0 : !fir.ref +!CHECK: omp.atomic.update memory_order(relaxed) hint(contended) %[[VAL_Z_DECLARE]]#1 : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[TEMP:.*]] = arith.addi %[[ARG]], %[[VAL_X_LOADED]] : i32 +!CHECK: omp.yield(%[[TEMP]] : i32) +!CHECK: } + !$omp atomic relaxed hint(omp_sync_hint_contended) + z = z + x + +!CHECK: %[[VAL_c10:.*]] = arith.constant 10 : i32 +!CHECK: omp.atomic.update memory_order(release) hint(contended) %[[VAL_Z_DECLARE]]#1 : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[TEMP:.*]] = arith.muli %[[VAL_c10]], %[[ARG]] : i32 +!CHECK: omp.yield(%[[TEMP]] : i32) +!CHECK: } + !$omp atomic release update hint(omp_lock_hint_contended) + z = z * 10 + +!CHECK: %[[VAL_Z_LOADED:.*]] = fir.load %[[VAL_Z_DECLARE]]#0 : !fir.ref +!CHECK: omp.atomic.update memory_order(release) hint(speculative) %[[VAL_X_DECLARE]]#1 : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i32): +!CHECK: %[[TEMP:.*]] = arith.divsi %[[ARG]], %[[VAL_Z_LOADED]] : i32 +!CHECK: omp.yield(%[[TEMP]] : i32) +!CHECK: } + !$omp atomic hint(omp_lock_hint_speculative) update release + x = x / z + +!CHECK: %[[VAL_c1:.*]] = arith.constant 1 : i32 +!CHECK: omp.atomic.update %[[VAL_i1_DECLARE]]#1 : !fir.ref { +!CHECK: ^bb0(%[[ARG:.*]]: i8): +!CHECK: %[[CONVERT:.*]] = fir.convert %[[ARG]] : (i8) -> i32 +!CHECK: %[[ADD:.*]] = arith.addi %[[CONVERT]], %[[VAL_c1]] : i32 +!CHECK: %[[TEMP:.*]] = fir.convert %[[ADD]] : (i32) -> i8 +!CHECK: omp.yield(%[[TEMP]] : i8) +!CHECK: } + !$omp atomic + i1 = i1 + 1 + !$omp end atomic +end program OmpAtomicUpdate From 04736c7f7ae97bf9dc0d7ebaefe80990d5c17d65 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Tue, 31 Oct 2023 18:36:40 +0900 Subject: [PATCH 142/144] [mlir][SCF] Use `transform.get_parent_op` instead of `transform.loop.get_parent_for` (#70757) Add a new attribute to `get_parent_op` to get the n-th parent. Remove `transform.loop.get_parent_for`, which is no longer needed. --- .../SCF/TransformOps/SCFTransformOps.td | 24 ---- .../mlir/Dialect/Transform/IR/TransformOps.td | 11 +- .../SCF/TransformOps/SCFTransformOps.cpp | 33 ------ .../lib/Dialect/Transform/IR/TransformOps.cpp | 41 ++++--- .../mlir/dialects/transform/__init__.py | 42 +++---- mlir/python/mlir/dialects/transform/loop.py | 24 ---- .../Dialect/SCF/transform-ops-invalid.mlir | 4 +- mlir/test/Dialect/SCF/transform-ops.mlir | 108 +----------------- .../Dialect/Transform/test-interpreter.mlir | 28 ++++- mlir/test/python/dialects/transform.py | 7 +- .../python/dialects/transform_loop_ext.py | 15 --- 11 files changed, 91 insertions(+), 246 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td b/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td index 700a29139a35b1..14df7e23a430fb 100644 --- a/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td +++ b/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td @@ -68,30 +68,6 @@ def ForallToForOp : Op]> { - let summary = "Gets a handle to the parent 'for' loop of the given operation"; - let description = [{ - Produces a handle to the n-th (default 1) parent `scf.for` or `affine.for` - (when the affine flag is true) loop for each Payload IR operation - associated with the operand. Fails if such a loop cannot be found. The list - of operations associated with the handle contains parent operations in the - same order as the list associated with the operand, except for operations - that are parents to more than one input which are only present once. - }]; - - let arguments = - (ins TransformHandleTypeInterface:$target, - DefaultValuedAttr, - "1">:$num_loops, - DefaultValuedAttr:$affine); - let results = (outs TransformHandleTypeInterface : $parent); - - let assemblyFormat = - "$target attr-dict `:` functional-type(operands, results)"; -} - def LoopOutlineOp : Op]> { diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td index 2fd0e80db96feb..307257f4a582be 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td @@ -620,10 +620,11 @@ def GetParentOp : TransformDialectOp<"get_parent_op", that case for each target op, the closest parent op that fulfills all requirements, is returned. - `isolated_from_above`: the parent op must be isolated from above - - `allow_empty_results`: get_parent_op is allowed to return an empty list and - still succeeds. In such a case, if get_parent_op fails for any operation - in the list, the entire transform returns an empty handle. + - `allow_empty_results`: get_parent_op is allowed to return an empty list + and still succeeds. In such a case, if get_parent_op fails for any + operation in the list, the entire transform returns an empty handle. - `op_name`: the parent op must have the specified name + - `nth_parent`: get the n-th parent of that satisfies the above requirements If `deduplicate` is set, the result handle does not contain any duplicate ops. For example, given the list @@ -641,7 +642,9 @@ def GetParentOp : TransformDialectOp<"get_parent_op", UnitAttr:$isolated_from_above, UnitAttr:$allow_empty_results, OptionalAttr:$op_name, - UnitAttr:$deduplicate); + UnitAttr:$deduplicate, + DefaultValuedAttr, + "1">:$nth_parent); let results = (outs TransformHandleTypeInterface:$parent); let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)"; diff --git a/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp b/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp index 65d503d7c4ad8b..62370604142cd5 100644 --- a/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp +++ b/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp @@ -49,39 +49,6 @@ void transform::ApplySCFStructuralConversionPatternsOp:: conversionTarget); } -//===----------------------------------------------------------------------===// -// GetParentForOp -//===----------------------------------------------------------------------===// - -DiagnosedSilenceableFailure -transform::GetParentForOp::apply(transform::TransformRewriter &rewriter, - transform::TransformResults &results, - transform::TransformState &state) { - SetVector parents; - for (Operation *target : state.getPayloadOps(getTarget())) { - Operation *loop, *current = target; - for (unsigned i = 0, e = getNumLoops(); i < e; ++i) { - loop = getAffine() - ? current->getParentOfType().getOperation() - : current->getParentOfType().getOperation(); - if (!loop) { - DiagnosedSilenceableFailure diag = - emitSilenceableError() - << "could not find an '" - << (getAffine() ? AffineForOp::getOperationName() - : scf::ForOp::getOperationName()) - << "' parent"; - diag.attachNote(target->getLoc()) << "target op"; - return diag; - } - current = loop; - } - parents.insert(loop); - } - results.set(cast(getResult()), parents.getArrayRef()); - return DiagnosedSilenceableFailure::success(); -} - //===----------------------------------------------------------------------===// // ForallToForOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index 514a75b5d59046..7136e423470a28 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -1232,27 +1232,30 @@ transform::GetParentOp::apply(transform::TransformRewriter &rewriter, SmallVector parents; DenseSet resultSet; for (Operation *target : state.getPayloadOps(getTarget())) { - Operation *parent = target->getParentOp(); - while (parent) { - bool checkIsolatedFromAbove = - !getIsolatedFromAbove() || - parent->hasTrait(); - bool checkOpName = !getOpName().has_value() || - parent->getName().getStringRef() == *getOpName(); - if (checkIsolatedFromAbove && checkOpName) - break; + Operation *parent = target; + for (int64_t i = 0, e = getNthParent(); i < e; ++i) { parent = parent->getParentOp(); - } - if (!parent) { - if (getAllowEmptyResults()) { - results.set(llvm::cast(getResult()), parents); - return DiagnosedSilenceableFailure::success(); + while (parent) { + bool checkIsolatedFromAbove = + !getIsolatedFromAbove() || + parent->hasTrait(); + bool checkOpName = !getOpName().has_value() || + parent->getName().getStringRef() == *getOpName(); + if (checkIsolatedFromAbove && checkOpName) + break; + parent = parent->getParentOp(); + } + if (!parent) { + if (getAllowEmptyResults()) { + results.set(llvm::cast(getResult()), parents); + return DiagnosedSilenceableFailure::success(); + } + DiagnosedSilenceableFailure diag = + emitSilenceableError() + << "could not find a parent op that matches all requirements"; + diag.attachNote(target->getLoc()) << "target op"; + return diag; } - DiagnosedSilenceableFailure diag = - emitSilenceableError() - << "could not find a parent op that matches all requirements"; - diag.attachNote(target->getLoc()) << "target op"; - return diag; } if (getDeduplicate()) { if (!resultSet.contains(parent)) { diff --git a/mlir/python/mlir/dialects/transform/__init__.py b/mlir/python/mlir/dialects/transform/__init__.py index f7a2026e800aeb..166c5c5ca4ec34 100644 --- a/mlir/python/mlir/dialects/transform/__init__.py +++ b/mlir/python/mlir/dialects/transform/__init__.py @@ -52,26 +52,28 @@ def patterns(self) -> Block: @_ods_cext.register_operation(_Dialect, replace=True) class GetParentOp(GetParentOp): - def __init__( - self, - result_type: Type, - target: Union[Operation, Value], - *, - isolated_from_above: bool = False, - op_name: Optional[str] = None, - deduplicate: bool = False, - loc=None, - ip=None, - ): - super().__init__( - result_type, - _get_op_result_or_value(target), - isolated_from_above=isolated_from_above, - op_name=op_name, - deduplicate=deduplicate, - loc=loc, - ip=ip, - ) + def __init__( + self, + result_type: Type, + target: Union[Operation, Value], + *, + isolated_from_above: bool = False, + op_name: Optional[str] = None, + deduplicate: bool = False, + nth_parent: int = 1, + loc=None, + ip=None, + ): + super().__init__( + result_type, + _get_op_result_or_value(target), + isolated_from_above=isolated_from_above, + op_name=op_name, + deduplicate=deduplicate, + nth_parent=nth_parent, + loc=loc, + ip=ip, + ) @_ods_cext.register_operation(_Dialect, replace=True) diff --git a/mlir/python/mlir/dialects/transform/loop.py b/mlir/python/mlir/dialects/transform/loop.py index 6c89025f413839..3bdd9ca3b22f07 100644 --- a/mlir/python/mlir/dialects/transform/loop.py +++ b/mlir/python/mlir/dialects/transform/loop.py @@ -17,30 +17,6 @@ from typing import Optional, Union -@_ods_cext.register_operation(_Dialect, replace=True) -class GetParentForOp(GetParentForOp): - """Extension for GetParentForOp.""" - - def __init__( - self, - result_type: Type, - target: Union[Operation, Value], - *, - num_loops: Optional[int] = None, - ip=None, - loc=None, - ): - if num_loops is None: - num_loops = 1 - super().__init__( - result_type, - _get_op_result_or_value(target), - num_loops=num_loops, - ip=ip, - loc=loc, - ) - - @_ods_cext.register_operation(_Dialect, replace=True) class LoopOutlineOp(LoopOutlineOp): """Extension for LoopOutlineOp.""" diff --git a/mlir/test/Dialect/SCF/transform-ops-invalid.mlir b/mlir/test/Dialect/SCF/transform-ops-invalid.mlir index 96c57d4716d375..59b824d4ca2620 100644 --- a/mlir/test/Dialect/SCF/transform-ops-invalid.mlir +++ b/mlir/test/Dialect/SCF/transform-ops-invalid.mlir @@ -32,7 +32,7 @@ func.func @test_loops_do_not_get_unrolled() { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.loop.get_parent_for %0 { affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> + %1 = transform.get_parent_op %0 {op_name = "affine.for"} : (!transform.any_op) -> !transform.op<"affine.for"> // expected-error @below {{failed to unroll}} transform.loop.unroll %1 { factor = 8 } : !transform.op<"affine.for"> transform.yield @@ -81,7 +81,7 @@ func.func @test_loops_do_not_get_peeled() { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.op<"scf.for"> + %1 = transform.get_parent_op %0 {op_name = "scf.for"} : (!transform.any_op) -> !transform.op<"scf.for"> // expected-error @below {{failed to peel}} transform.loop.peel %1 : (!transform.op<"scf.for">) -> (!transform.any_op, !transform.any_op) transform.yield diff --git a/mlir/test/Dialect/SCF/transform-ops.mlir b/mlir/test/Dialect/SCF/transform-ops.mlir index 6d1ba48d3b935b..74601cf5b34a17 100644 --- a/mlir/test/Dialect/SCF/transform-ops.mlir +++ b/mlir/test/Dialect/SCF/transform-ops.mlir @@ -1,53 +1,5 @@ // RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics | FileCheck %s -// CHECK-LABEL: @get_parent_for_op -func.func @get_parent_for_op(%arg0: index, %arg1: index, %arg2: index) { - // expected-remark @below {{first loop}} - scf.for %i = %arg0 to %arg1 step %arg2 { - // expected-remark @below {{second loop}} - scf.for %j = %arg0 to %arg1 step %arg2 { - // expected-remark @below {{third loop}} - scf.for %k = %arg0 to %arg1 step %arg2 { - arith.addi %i, %j : index - } - } - } - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op - // CHECK: = transform.loop.get_parent_for - %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.op<"scf.for"> - %2 = transform.loop.get_parent_for %0 { num_loops = 2 } : (!transform.any_op) -> !transform.op<"scf.for"> - %3 = transform.loop.get_parent_for %0 { num_loops = 3 } : (!transform.any_op) -> !transform.op<"scf.for"> - transform.test_print_remark_at_operand %1, "third loop" : !transform.op<"scf.for"> - transform.test_print_remark_at_operand %2, "second loop" : !transform.op<"scf.for"> - transform.test_print_remark_at_operand %3, "first loop" : !transform.op<"scf.for"> - transform.yield - } -} - -// ----- - -func.func @get_parent_for_op_no_loop(%arg0: index, %arg1: index) { - // expected-note @below {{target op}} - arith.addi %arg0, %arg1 : index - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op - // expected-error @below {{could not find an 'scf.for' parent}} - %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.op<"scf.for"> - transform.yield - } -} - -// ----- - // Outlined functions: // // CHECK: func @foo(%{{.+}}, %{{.+}}, %{{.+}}, %{{.+}}) @@ -81,7 +33,7 @@ func.func @loop_outline_op(%arg0: index, %arg1: index, %arg2: index) { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.op<"scf.for"> + %1 = transform.get_parent_op %0 {op_name = "scf.for"} : (!transform.any_op) -> !transform.op<"scf.for"> // CHECK: = transform.loop.outline %{{.*}} transform.loop.outline %1 {func_name = "foo"} : (!transform.op<"scf.for">) -> (!transform.any_op, !transform.any_op) transform.yield @@ -114,7 +66,7 @@ func.func @loop_peel_op() { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.op<"scf.for"> + %1 = transform.get_parent_op %0 {op_name = "scf.for"} : (!transform.any_op) -> !transform.op<"scf.for"> %main_loop, %remainder = transform.loop.peel %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">, !transform.op<"scf.for">) // Make sure transform.test_print_remark_at_operand %main_loop, "main loop" : !transform.op<"scf.for"> @@ -152,7 +104,7 @@ func.func @loop_pipeline_op(%A: memref, %result: memref) { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["arith.addf"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.op<"scf.for"> + %1 = transform.get_parent_op %0 {op_name = "scf.for"} : (!transform.any_op) -> !transform.op<"scf.for"> %2 = transform.loop.pipeline %1 : (!transform.op<"scf.for">) -> !transform.any_op // Verify that the returned handle is usable. transform.test_print_remark_at_operand %2, "transformed" : !transform.any_op @@ -178,7 +130,7 @@ func.func @loop_unroll_op() { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.op<"scf.for"> + %1 = transform.get_parent_op %0 {op_name = "scf.for"} : (!transform.any_op) -> !transform.op<"scf.for"> transform.loop.unroll %1 { factor = 4 } : !transform.op<"scf.for"> transform.yield } @@ -186,54 +138,6 @@ module attributes {transform.with_named_sequence} { // ----- -// CHECK-LABEL: @get_parent_for_op -func.func @get_parent_for_op(%arg0: index, %arg1: index, %arg2: index) { - // expected-remark @below {{first loop}} - affine.for %i = %arg0 to %arg1 { - // expected-remark @below {{second loop}} - affine.for %j = %arg0 to %arg1 { - // expected-remark @below {{third loop}} - affine.for %k = %arg0 to %arg1 { - arith.addi %i, %j : index - } - } - } - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op - // CHECK: = transform.loop.get_parent_for - %1 = transform.loop.get_parent_for %0 { affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> - %2 = transform.loop.get_parent_for %0 { num_loops = 2, affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> - %3 = transform.loop.get_parent_for %0 { num_loops = 3, affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> - transform.test_print_remark_at_operand %1, "third loop" : !transform.op<"affine.for"> - transform.test_print_remark_at_operand %2, "second loop" : !transform.op<"affine.for"> - transform.test_print_remark_at_operand %3, "first loop" : !transform.op<"affine.for"> - transform.yield - } -} - -// ----- - -func.func @get_parent_for_op_no_loop(%arg0: index, %arg1: index) { - // expected-note @below {{target op}} - arith.addi %arg0, %arg1 : index - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op - // expected-error @below {{could not find an 'affine.for' parent}} - %1 = transform.loop.get_parent_for %0 { affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> - transform.yield - } -} - -// ----- - func.func @loop_unroll_op() { %c0 = arith.constant 0 : index %c42 = arith.constant 42 : index @@ -250,7 +154,7 @@ func.func @loop_unroll_op() { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.loop.get_parent_for %0 { affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> + %1 = transform.get_parent_op %0 {op_name = "affine.for"} : (!transform.any_op) -> !transform.op<"affine.for"> transform.test_print_remark_at_operand %1, "affine for loop" : !transform.op<"affine.for"> transform.loop.unroll %1 { factor = 4, affine = true } : !transform.op<"affine.for"> transform.yield @@ -277,7 +181,7 @@ func.func @test_mixed_loops() { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.loop.get_parent_for %0 { num_loops = 1, affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> + %1 = transform.get_parent_op %0 {op_name = "affine.for"} : (!transform.any_op) -> !transform.op<"affine.for"> transform.test_print_remark_at_operand %1, "affine for loop" : !transform.op<"affine.for"> transform.loop.unroll %1 { factor = 4 } : !transform.op<"affine.for"> transform.yield diff --git a/mlir/test/Dialect/Transform/test-interpreter.mlir b/mlir/test/Dialect/Transform/test-interpreter.mlir index 3891c16b411559..d9a11994eb9d90 100644 --- a/mlir/test/Dialect/Transform/test-interpreter.mlir +++ b/mlir/test/Dialect/Transform/test-interpreter.mlir @@ -116,6 +116,32 @@ transform.with_pdl_patterns { // ----- +func.func @test_get_nth_parent() { + "test.foo"() ({ + // expected-remark @below{{2nd parent}} + "test.foo"() ({ + "test.qux"() ({ + // expected-remark @below{{1st parent}} + "test.foo"() ({ + "test.bar"() : () -> () + }) : () -> () + }) : () -> () + }) : () -> () + }) : () -> () +} + +transform.sequence failures(propagate) { +^bb0(%arg0: !transform.any_op): + %f = transform.structured.match ops{["test.bar"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %parent = get_parent_op %f {nth_parent = 1, op_name = "test.foo"} : (!transform.any_op) -> !transform.any_op + test_print_remark_at_operand %parent, "1st parent" : !transform.any_op + %parent2 = get_parent_op %f {nth_parent = 2, op_name = "test.foo"} : (!transform.any_op) -> !transform.any_op + test_print_remark_at_operand %parent2, "2nd parent" : !transform.any_op + transform.yield +} + +// ----- + func.func @foo() { %0 = arith.constant 0 : i32 return @@ -355,7 +381,7 @@ transform.with_pdl_patterns { sequence %arg0 : !transform.any_op failures(propagate) { ^bb1(%arg1: !transform.any_op): %0 = transform.pdl_match @match_const in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {op_name = "scf.for"} : (!transform.any_op) -> !transform.any_op // expected-error @below {{only isolated-from-above ops can be alternative scopes}} alternatives %1 : !transform.any_op { ^bb2(%arg2: !transform.any_op): diff --git a/mlir/test/python/dialects/transform.py b/mlir/test/python/dialects/transform.py index 481d7745720101..d778172a607a36 100644 --- a/mlir/test/python/dialects/transform.py +++ b/mlir/test/python/dialects/transform.py @@ -162,13 +162,16 @@ def testGetParentOp(): ) with InsertionPoint(sequence.body): transform.GetParentOp( - transform.AnyOpType.get(), sequence.bodyTarget, isolated_from_above=True + transform.AnyOpType.get(), + sequence.bodyTarget, + isolated_from_above=True, + nth_parent=2, ) transform.YieldOp() # CHECK-LABEL: TEST: testGetParentOp # CHECK: transform.sequence # CHECK: ^{{.*}}(%[[ARG1:.+]]: !transform.any_op): - # CHECK: = get_parent_op %[[ARG1]] {isolated_from_above} + # CHECK: = get_parent_op %[[ARG1]] {isolated_from_above, nth_parent = 2 : i64} @run diff --git a/mlir/test/python/dialects/transform_loop_ext.py b/mlir/test/python/dialects/transform_loop_ext.py index daec6707d6743b..840e7a46e7ce09 100644 --- a/mlir/test/python/dialects/transform_loop_ext.py +++ b/mlir/test/python/dialects/transform_loop_ext.py @@ -16,21 +16,6 @@ def run(f): return f -@run -def getParentLoop(): - sequence = transform.SequenceOp( - transform.FailurePropagationMode.Propagate, [], pdl.OperationType.get() - ) - with InsertionPoint(sequence.body): - loop.GetParentForOp( - transform.OperationType.get("scf.for"), sequence.bodyTarget, num_loops=2 - ) - transform.YieldOp() - # CHECK-LABEL: TEST: getParentLoop - # CHECK: = transform.loop.get_parent_for % - # CHECK: num_loops = 2 - - @run def loopOutline(): sequence = transform.SequenceOp( From 03ec84a00ba4d540222ab39c407e02959058fbdd Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 31 Oct 2023 10:46:49 +0100 Subject: [PATCH 143/144] Revert "Add two time-trace scope variables." This reverts commit 33b85867e30e1adc2ff2173039c199b81c10f52b. This causes a large compile-time regression (about 1% for unoptimized builds). --- clang/docs/ReleaseNotes.rst | 8 ---- clang/lib/Parse/Parser.cpp | 14 +----- ...e-ParseDeclarationOrFunctionDefinition.cpp | 15 ------- clang/unittests/Support/TimeProfilerTest.cpp | 44 ++++++++----------- 4 files changed, 19 insertions(+), 62 deletions(-) delete mode 100644 clang/test/Driver/check-time-trace-ParseDeclarationOrFunctionDefinition.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c151bd9d234b51..bc28bb567f6932 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -427,14 +427,6 @@ Improvements to Clang's diagnostics (or, more commonly, ``NULL`` when the platform defines it as ``__null``) to be more consistent with GCC. -Improvements to Clang's time-trace ----------------------------------- -- Two time-trace scope variables are added. A time trace scope variable of - ``ParseDeclarationOrFunctionDefinition`` with the function's source location - is added to record the time spent parsing the function's declaration or - definition. Another time trace scope variable of ``ParseFunctionDefinition`` - is also added to record the name of the defined function. - Bug Fixes in This Version ------------------------- - Fixed an issue where a class template specialization whose declaration is diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index bef3a0dcb285ef..0f930248e77174 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -13,8 +13,8 @@ #include "clang/Parse/Parser.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" -#include "clang/AST/ASTLambda.h" #include "clang/AST/DeclTemplate.h" +#include "clang/AST/ASTLambda.h" #include "clang/Basic/FileManager.h" #include "clang/Parse/ParseDiagnostic.h" #include "clang/Parse/RAIIObjectsForParser.h" @@ -22,7 +22,6 @@ #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "llvm/Support/Path.h" -#include "llvm/Support/TimeProfiler.h" using namespace clang; @@ -1230,13 +1229,6 @@ Parser::DeclGroupPtrTy Parser::ParseDeclOrFunctionDefInternal( Parser::DeclGroupPtrTy Parser::ParseDeclarationOrFunctionDefinition( ParsedAttributes &Attrs, ParsedAttributes &DeclSpecAttrs, ParsingDeclSpec *DS, AccessSpecifier AS) { - // Add an enclosing time trace scope for a bunch of small scopes with - // "EvaluateAsConstExpr". - llvm::TimeTraceScope TimeScope( - "ParseDeclarationOrFunctionDefinition", - Tok.getLocation().printToString( - Actions.getASTContext().getSourceManager())); - if (DS) { return ParseDeclOrFunctionDefInternal(Attrs, DeclSpecAttrs, *DS, AS); } else { @@ -1267,10 +1259,6 @@ Parser::DeclGroupPtrTy Parser::ParseDeclarationOrFunctionDefinition( Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D, const ParsedTemplateInfo &TemplateInfo, LateParsedAttrList *LateParsedAttrs) { - llvm::TimeTraceScope TimeScope( - "ParseFunctionDefinition", - Actions.GetNameForDeclarator(D).getName().getAsString()); - // Poison SEH identifiers so they are flagged as illegal in function bodies. PoisonSEHIdentifiersRAIIObject PoisonSEHIdentifiers(*this, true); const DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo(); diff --git a/clang/test/Driver/check-time-trace-ParseDeclarationOrFunctionDefinition.cpp b/clang/test/Driver/check-time-trace-ParseDeclarationOrFunctionDefinition.cpp deleted file mode 100644 index f854cddadbfcc1..00000000000000 --- a/clang/test/Driver/check-time-trace-ParseDeclarationOrFunctionDefinition.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// RUN: %clangxx -S -ftime-trace -ftime-trace-granularity=0 -o %T/check-time-trace-ParseDeclarationOrFunctionDefinition %s -// RUN: cat %T/check-time-trace-ParseDeclarationOrFunctionDefinition.json \ -// RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ -// RUN: | FileCheck %s - -// CHECK-DAG: "name": "ParseDeclarationOrFunctionDefinition" -// CHECK-DAG: "detail": "{{.*}}check-time-trace-ParseDeclarationOrFunctionDefinition.cpp:15:1" -// CHECK-DAG: "name": "ParseFunctionDefinition" -// CHECK-DAG: "detail": "foo" -// CHECK-DAG: "name": "ParseFunctionDefinition" -// CHECK-DAG: "detail": "bar" - -template -void foo(T) {} -void bar() { foo(0); } diff --git a/clang/unittests/Support/TimeProfilerTest.cpp b/clang/unittests/Support/TimeProfilerTest.cpp index 97fdbb7232b135..a7ca2bf91e474e 100644 --- a/clang/unittests/Support/TimeProfilerTest.cpp +++ b/clang/unittests/Support/TimeProfilerTest.cpp @@ -177,29 +177,22 @@ constexpr int slow_init_list[] = {1, 1, 2, 3, 5, 8, 13, 21}; // 25th line std::string TraceGraph = buildTraceGraph(Json); ASSERT_TRUE(TraceGraph == R"( Frontend -| ParseDeclarationOrFunctionDefinition (test.cc:2:1) -| ParseDeclarationOrFunctionDefinition (test.cc:6:1) -| | ParseFunctionDefinition (slow_func) -| | | EvaluateAsRValue () -| | | EvaluateForOverflow () -| | | EvaluateForOverflow () -| | | EvaluateAsRValue () -| | | EvaluateForOverflow () -| | | isPotentialConstantExpr (slow_namespace::slow_func) -| | | EvaluateAsBooleanCondition () -| | | | EvaluateAsRValue () -| | | EvaluateAsBooleanCondition () -| | | | EvaluateAsRValue () -| ParseDeclarationOrFunctionDefinition (test.cc:16:1) -| | ParseFunctionDefinition (slow_test) -| | | EvaluateAsInitializer (slow_value) -| | | EvaluateAsConstantExpr () -| | | EvaluateAsConstantExpr () -| ParseDeclarationOrFunctionDefinition (test.cc:22:1) -| | EvaluateAsConstantExpr () -| | EvaluateAsRValue () -| ParseDeclarationOrFunctionDefinition (test.cc:25:1) -| | EvaluateAsInitializer (slow_init_list) +| EvaluateAsRValue () +| EvaluateForOverflow () +| EvaluateForOverflow () +| EvaluateAsRValue () +| EvaluateForOverflow () +| isPotentialConstantExpr (slow_namespace::slow_func) +| EvaluateAsBooleanCondition () +| | EvaluateAsRValue () +| EvaluateAsBooleanCondition () +| | EvaluateAsRValue () +| EvaluateAsInitializer (slow_value) +| EvaluateAsConstantExpr () +| EvaluateAsConstantExpr () +| EvaluateAsConstantExpr () +| EvaluateAsRValue () +| EvaluateAsInitializer (slow_init_list) | PerformPendingInstantiations )"); @@ -220,9 +213,8 @@ struct { std::string TraceGraph = buildTraceGraph(Json); ASSERT_TRUE(TraceGraph == R"( Frontend -| ParseDeclarationOrFunctionDefinition (test.c:2:1) -| | isIntegerConstantExpr () -| | EvaluateKnownConstIntCheckOverflow () +| isIntegerConstantExpr () +| EvaluateKnownConstIntCheckOverflow () | PerformPendingInstantiations )"); From 75b3c3d267bf49b9061db55c4527cfea9e62f77a Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 31 Oct 2023 09:51:30 +0000 Subject: [PATCH 144/144] [ARM] Disable UpperBound loop unrolling for MVE tail predicated loops. (#69709) For MVE tail predicated loops, better code can be generated by keeping the loop whole than to unroll to an upper bound, which requires the expansion of active lane masks that can be difficult to generate good code for. This patch disables UpperBound unrolling when we find a active_lane_mask in the loop. --- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 12 ++- .../LoopUnroll/ARM/mve-upperbound.ll | 79 +++++++++++++++++++ 2 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index e0d112c4a7eddb..1dee7a3ccb6d8d 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2430,9 +2430,15 @@ ARMTTIImpl::getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const { void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) { - // Enable Upper bound unrolling universally, not dependant upon the conditions - // below. - UP.UpperBound = true; + // Enable Upper bound unrolling universally, providing that we do not see an + // active lane mask, which will be better kept as a loop to become tail + // predicated than to be conditionally unrolled. + UP.UpperBound = + !ST->hasMVEIntegerOps() || !any_of(*L->getHeader(), [](Instruction &I) { + return isa(I) && + cast(I).getIntrinsicID() == + Intrinsic::get_active_lane_mask; + }); // Only currently enable these preferences for M-Class cores. if (!ST->isMClass()) diff --git a/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll b/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll new file mode 100644 index 00000000000000..2bb6f05b91b1ab --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-unroll -S -mtriple thumbv8.1m.main-none-eabi -mattr=+mve %s | FileCheck %s + +; The vector loop here is better kept as a loop than conditionally unrolled, +; letting it transform into a tail predicted loop. + +define void @unroll_upper(ptr noundef %pSrc, ptr nocapture noundef writeonly %pDst, i32 noundef %blockSize) { +; CHECK-LABEL: @unroll_upper( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_NOT23:%.*]] = icmp ult i32 [[BLOCKSIZE:%.*]], 16 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[BLOCKSIZE]], 15 +; CHECK-NEXT: [[CMP6_NOT28:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: br i1 [[CMP6_NOT28]], label [[WHILE_END12:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i32 [[AND]] +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i32 [[AND]], 1 +; CHECK-NEXT: [[SCEVGEP32:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[PDST]], [[SCEVGEP32]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PSRC]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i32 [[AND]], 7 +; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], 24 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_MEMCHECK]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP37:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP1]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[AND]]) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[NEXT_GEP37]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison) +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[WIDE_MASKED_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> +; CHECK-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP3]], ptr [[NEXT_GEP]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label [[WHILE_END12_LOOPEXIT:%.*]], label [[VECTOR_BODY]] +; CHECK: while.end12.loopexit: +; CHECK-NEXT: br label [[WHILE_END12]] +; CHECK: while.end12: +; CHECK-NEXT: ret void +; +entry: + %cmp.not23 = icmp ult i32 %blockSize, 16 + %and = and i32 %blockSize, 15 + %cmp6.not28 = icmp eq i32 %and, 0 + br i1 %cmp6.not28, label %while.end12, label %vector.memcheck + +vector.memcheck: ; preds = %entry + %scevgep = getelementptr i8, ptr %pDst, i32 %and + %0 = shl nuw nsw i32 %and, 1 + %scevgep32 = getelementptr i8, ptr %pSrc, i32 %0 + %bound0 = icmp ult ptr %pDst, %scevgep32 + %bound1 = icmp ult ptr %pSrc, %scevgep + %found.conflict = and i1 %bound0, %bound1 + %n.rnd.up = add nuw nsw i32 %and, 7 + %n.vec = and i32 %n.rnd.up, 24 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.memcheck + %index = phi i32 [ 0, %vector.memcheck ], [ %index.next, %vector.body ] + %next.gep = getelementptr i8, ptr %pDst, i32 %index + %1 = shl i32 %index, 1 + %next.gep37 = getelementptr i8, ptr %pSrc, i32 %1 + %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %and) + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %next.gep37, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison) + %2 = lshr <8 x i16> %wide.masked.load, + %3 = trunc <8 x i16> %2 to <8 x i8> + call void @llvm.masked.store.v8i8.p0(<8 x i8> %3, ptr %next.gep, i32 1, <8 x i1> %active.lane.mask) + %index.next = add i32 %index, 8 + %4 = icmp eq i32 %index.next, %n.vec + br i1 %4, label %while.end12, label %vector.body + +while.end12: ; preds = %vector.body, %entry + ret void +} + +declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) +declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr nocapture, i32 immarg, <8 x i1>, <8 x i16>) +declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr nocapture, i32 immarg, <8 x i1>)

hEFU_FXtJ0oFr7LQk^*HdLMo#TrI z#YWa2dl~f0+((8EW!f`~q-DGndTAw|1^mSpuf~jG3ngYD@F61INYJ5F;S@jEr0=C# zEIq;sOCsSL6{~m)L}q}ON5Kmu>x31?huMV}S!Em&{*Lke7A33k{aD6#L$KJv&)vrb zKi@GY1V3+j2`?I&h#?}z42A!}-asyF7DAkSC-PptjQ^z5#vj~)KU$47X9i|pluiH@ z`$@mY^QKqXs^-WXW`}T_%XmxXAO4d~=8S7I@Ud;8`(@_^a3GJS`u)~0g3TA#a%?}P zHd|Q6gbZaKf0a_1_5C~gNMDtJRR5hn_AnHnQXcZ(NynZ`nM_T{wyiLmt{Txfu@FAR z3xRLqTa4D;iM2}L#X!SYmTpuxMXx&+K9M%-K=^o0ux0GU!E?r565_IsMuNrG7}e=f zvpIY`bM~3UhnOL$zNOxH>YOhnC4Yneofm(p{+(?$)^_p_|Ay!=4CdFE(LT!vV!s~o zsHXZdI$#cIkO=qDDcQ|j@VlO(tcGwyvy>C==r*g#tP*(sJz1hI?kK?N=c+n{!u94c z?jMox;TufDUz>zp=KJLEiEhD5%^B2Kxa2Xl4BV6^rtY#2s#eOP<7lg0_Ba2o|2^n` z??Ch>>3^#s4|EwPw55m&C$Mhr0!H#TfRi~>68*-dLK+>gHpk{~G)wnC%i?mKwbK9P zy8i^tz(Hk-x7lw@*l1Kw5HlLDRU57a&)sGkJ)C3Ye(;PlA2T=vA0{H~ZA(xa}!r>Gb#pGMeGtr!Yct z+jkk&M!_M|C~gcyuY~}v^%fio7$r@hrwe`PQqWdvl+djSDSS&QI7A%hI{Jm{Qr)Q# z5v6K^1Kq_HVG4g1WI%b+dq2(?q5ZBQ)$KcvHpKh0EUW9NtaOQyLyx#+>;UaFeqCcM ztMS$V?gG~9839DaD0zX7s5WZJ!RJD)4!83Fu;4&!azT-dh8;PUDRWWJf=`U zYM@|i=mXGtBBv~aT3dF|D!;YSAKrsS*`quC`0UrVq)7~z601oMN9S~PSMeQO*}z@J z*k`4V!^~@3w$Uqf6eeAxva#42#}z+;ad~30l_P0+lExK3fpL>1tw_=)OBz@F1jaom zY11X`K{I@?oZ3!>{v<7BD5<6I$tzCr*4kfV6Z zF3aa-FE9TyH4M-1YKMVbw_y#GCZbH4;5)3L3?T=+`eVhA+@5NAV>KJG?74pq49wX$d>A85! zPW8-R(wo(@EJrnJel!u5U8cwgxf*TAiAfAK>0Cc^ScRP;XcZCFWxS<)nHi}vC)xtI z-+GVZ&nMF&Rdp*Xc#Yp`#)kBiwOMX9)+L=XFumNIr~C!mWWEpqPePuxyv3jJZlfj2Z=63eeXQEd^+#)s;0(q1enlYx_`9_}yo)fS zjc~8@S(n4v+j*I}xM<tZ{yJ=S2>HNBQkx_yr=!D`$Tu0vE&95mLRT~_IGJ&8`vcB=Hzp<>FG{HA& zb*L@*x(WPq76q_yXKkRAP`$)FOI~llPmxi*PL`eZ!QL|$SFqhNswXQDy402ok+=^* zuj{;*KgOIjqqllPN0@qOG*BWph7hu3$MQJH=At%M8Gkfu0GAw=Jpxzg7hsin2&tAp zG{-iZ-W^BlQ-b}h?cC}HJud6hb73EljQ&Ngo8y|g&XidrW9wb)JHJzUdKXT~!0n2s zEKx*2>cJiT}qEV{-61!!IHPSJi zEJs@^rQxW*!~1qJ1oESRfkRVZ(=N}aA!ACy#Chw@m54wO^hsn9D&>r#RylOj*8Ltes{H)LB| z%Xd#NuiIJkWm;;0bxgPNcZ zhYr>=W6sH69iu04nl68gGZY+bLxJ)y%T#L5+cG(kb!qIQ-i*d0EHf%pdIZ<++QVb`;v zWkfnO7`?dz$L-P~>nBB1&k8Vp;WZ!@!~RY`mx+bteA!$sBf`HYg{_7=@%9Tu{jmPy z8M7{6qXBcNBX=`#|G+Ad?`U@&{?&(#`M)C}e?zg9X07uN*@PW?hfikXImnOD6zk)% z{%86Y_7{2(6fz(E;f@qz-Y=;Ir=!uNr6hWz^Dms1{f)6~+L>M+I^>}X4_vS@?aUNi zQmP-y`ncsrDFc@W)sy$>iNsKQdbCQV?E|o9RTt){498Pvo#}L; z>>5||XOELxV9^l@|AxE=bodq)_d_6}^6*LToUd9&YWSS8L3C0D=VB9D_F(>o7PW~UB-dk%nU_9{MqF@FQ+zXB5*AR_z#m?o)9 zRcei-7D?)4mD;RQcS`DXNqtbIw(&RXd^6lyP8;B#5^k@+A02uQms4Iw60-c*E2tV- zeotEdHetW#1H*-!0zN>26LzW|@qL?!@3jQfrqRBmwQ$U5ctSO;`>W-5?v)SbWJj|KmFuX+xyG6jdK3B8Tn9)g`pMw4lSNSjQ0hjy7Z&6_4@+h(rW0UWv`G@d})qK z&r|73ReFs|=bjAmPgd#8Dt)I)pRUp$ROxN}&4Lp4Lm(?pgbp7eEMf-psTILe(!3cYxu*f4W3l- z^`bj~L5>#&36BCFFvwP}dk^n2TK)mlqW)c3o;(!irG25chT_qkvM?bL*L1|s4Q`9; zb0KdON#`cym$B&{M;%nq5F$!ZvINd_XJ2qU(TDN^j~VrQL&wl@vXY+*Rius+xujR| z!r}rLI6@-MVJsJ$8xUM@dyET3g5L{>vo1Jj`vw-CVhLS%*CS=vDF`llFrCZv| zvYSN&@Z-jOaZKce>(LJx^RMCs2XWdN7;>0WSuyvdoUT6;{l*CNQ_wY+mqRfj=f3I- zRf|3G?>q!Xh<<9M(oc;L{nSXMpBf?hsgdZX8d6IO+Cm?+w6Yu<^S|V;`Hq&fMAT|B z!kbA#YXM3`#;7l~HkA&+rBY;!`}jnVe7+bnfe@+Z*B@Whtb5#-T*+%$P z2n1=fMMbI(x95}<@v;lfO9?q;w}8i%H)WxM9}ulQyc1!-Wc6N+zD?;(SOi$MBdV0n zmR5eb&nF=b5XufoRjq0wUc5|WS>DeQ&Q6$!@E54>ZbxX?O7I=B)gg?mXUfKe5wekc zyUJbX5AUY(TiRqFLggYY`>fSYZrV^lGszw12QH1f@3)s#M6ftNMO7!PulGR^bfvJr zZU@!-4)x4mzI~{k`OEVP^}GVcrPtYN|FIn~xRl_H<8dx7-<4Nh+g8((T3#y)fNGYX zi-`!7Q96V>$xpHvx;=gu`=fobbp5RK-KFf_2k8<_l(1)OLOV+f8jOmU>5E=*hfC)) znLNi@%fi046cI|TxQ8Uvkas;dDzeb33Rhx5P&#Cld&Om6-QO1pbUO(_UF@?0y+w#s zEAHnkCr0I7klb<^wdjR>CsfT!t0hFK#$A?@M`QAsaDNfTvO$s~jM16%WiNYvo}_6} zOwwd0d;Vle(}I|!$v*b{2jguj@8F_}JcX)prXNIAGfLQ$m3I=ic#^>x5ROV!Hy`iZQsKnxJ9 zbd$`Y(N74W|37vb{XR0yA99?n(L!vG~cK*Fa&T6$O=B-;OabI2!@OSSbQ{3#S+?CgOWf?){inrMs-#tbwx)T&_9a8^iiep+FWeag?A$ab*9f60rzc> z%L5HwPXk-9245HcPvQSm{!ioouKb^loWqG|xJKz8Hv})uKiZI*Pa;cQ!C~}|=Sf+9 zZVin{Bqo8)y5rm*b`}^zmN3yCRQp3usiis}!b@Quq^U*4GNpxYl zbm3tXnO&qC(P7W4e!#Ok(q|zv?`&WEf4luRxVf$BkJhgs#)ZDl->|rR?3oB?*Ly=p zMZh2?B&S6flyauv4W%)7JPzTOXuow5aDvm~g z1!Td;F!Vq)l~dR`>2UFOhvCsLHgxk>GW$_a8x<1hN(5MrCZPz0o+YWIb2*3L|KR|6Pn? z{>0PX1SA?F4M6 zfUN|K+y@DtH1iV@cS$0-ThVLOFpugxjLKVSyYFb7H+W5~sm9yIs)@8hwb^On!Tpr- zWvoRXQwq(SPRH&f!_3{W4*3H`MFW6*+M=}NkH9@(s2E%|g{>Hj?TFjY?KfEU0?_Gn4 z+0U4Vv9|x7PHm1TgK)kJr{AKlR@ z{mgvgMv({ltfSe@@>IMoTe>2Tfpt0ZGi_a-{M@y!h!4B?f496&SvQ$Sa1jHW&f`iA z_8^boyEdB!oC>`x+s_ef(4eXst%Pv}Oqsd(YYNL~1Eh|pKD1H6ve>T;K_;hTwG=(? z+1GOUU7UaVyLV+R$^dnvXrUYKbkYySL4cu?jN>jO|f_K)E&(y{tBsz;ld?W{WR zNsb#3){dvox-MQG-@BQaL?^Dk%FCZwT_n&c$2q*G@MmJG{`g3mnfaj{?vO;Z`s%CF z&CDiUZU#5+&8m_KldvWS-OeNt(laXO%K23zf3VUxaEr3i*oT2j^~DUUeKWTGaU*NF zslgOhpknL9pZYsbuKnX=0*+yJ?w7-1`Im!nmHvlalq$rGzT6Ku!rjUQmMgvf!ady{ zbA`$#6cy90?nvOH(#8sg`}#5um;*Pp`HeT0Nqd5GgptVg1;;;S4>J357Kc&p451wy zr`VT%&s}JuBDwMaI^ zSVABAQCLJvFiV*)pTsip>d!%K?dNHJtK!nZq_H$2qEuX($Fr}Y;?g31Ir>ml`QH_6 z@b4f7g!j!6Tq=2=mb{^l zxb{iK@kutiYdJ9xbP%-zoyr=Nm*|{T5 zia93qG=N^4ncE>3i6_S8;cvXHu%T({vlva~5<@)i^h}{`Vskdw{wPN_Qmy?>WBJa) zSwkEObr%1lUs`Dtk__t8XUI98mC{T-Hh(s+|tY#BOy+kFK(##=vq zci|jjGjrvdP{Eu&u52aoY+@0Y9OmubT@WKh_Y-?r70+GSc3?4=v0Mrgg8dx(X0^sX zF4c3pUO$gfaSby|Sqqk9F?zRZdALeQ+!MT6y+vOTLdI@?Kw`d%nLfKE^?l5rBzM`+ z@-d#!7iRd;b`R4s@~ACNJ+ICalf)obgV12R?{3;?jc;#Vf%Cv*g@94Ex;Z*BG}jEj zuWB_aUSw3Ujmk-oytFS*mKt+RrDQU~v&myxl8Taoy${=kA<&{sL&6*e{gg8 zjqqsd)Df*yjqH*Pwt8Lk8MoDqrnJa?SV~@^qZ}y!8pzElf3Js(CnGP0L%#C8 zv|aA{qcJ^p=-C@t(j$*7{zy;S^T`9MJ@?WcyBMp;^7jWfDey;S2pn7+6+cmL(*(^m z_ei)a1}7&F{r%t!&>y|zmQ-tfQ_SnvQ?Oq5+t-QfIx zQNA&6cnts8_cNU^$Df_ykNki-yE7dbg}MP;2cy;+xv0k(Q+B;qGpH45(oZ73-;sr@*tG=dvvrWwhe}rq5O501h-Y8m_ ze!ZFww_t2<563D_1-YEtD2rgrEXJEG_u|b(l`l9_VdUF_6d^i!|9i>bNa?jdou6Fs~@q625g z@Pt#7edfm$F?Xd@qCGU|c$v7HM*E&Pmelc6a42{C7~gLVZvKWiGWGBd)}rt4zBU2;(X5{bNO__s%NDMHeaW zSfkC@R3*;;W@F<^LQ~c{SABdB%;1K%^`~V)7%#QfkTHQgML*EDI|b>0c~PiPk```1 z6+D;(P$csqDZdIQnhf%l3uN)P@|UX4xT{Fcucj-qdKJ;NV_nqKPPf;Y&LVq!I}7{s z8J$&#T?u?~RVZoV*ir6$kbAPvt}(k8=Pre*WxHA{>l^SnZg^m49IeWwy8I2G4=tPK zC{^A6P5U~kl)aiB90ij=81f3?lC6#^6?xSezs5QlM%~B7RsCX>SMUG^-q|*6+2(oM z&mAv{p<}zSd?PmT6!`VMbwyXP`5V$myi!|9E5*5}jvmWxtNNY*S{*cZan%@}UGygZa1QtuCdw0dQ#nsRqZ-%-p9g`T2VF=}WQSII`r)qWXErpT8%btd-K zs)9Ysvq@pgN~r92U?x@5W3SNWy;-@QH7|}a?}hm)OnK2ka^a@ZuY!)fx-iw=xZ9JI zi~Co3;l1bKz8f0XkT#tMFN{e7L*j^i6|eZlj#ocZgsoFdHd;!1FaZ?tob7HlrkyRo z{Nw!23>Ws4H>2Wt5_rqdZx(N|g}8nTb2)NeYyT_{@_ulZ_)kXq?&Vd0RnK;;BMTbc zn+b4DqR7{LU^jq$i2W>4T3OK2u~7S7VQ>P{vey&5EMT=Fs^B)?_eDxFhio-d`Yx0w zRp8SUNLjKFE|XiraaV3rsEIrGO^Oct9ts+7>*t6d@E`P>5}2Fjzf;cy2%cd^(w6ch z_bj8wX&EE4B04+`_x2U#<7CpYWv75;&pNi z@>5$QztvT77G;*`o+w?fNFJ{@zCB*cRwvr{qD1OZmU44*3ZK4{9bht$u53cb${~vWXAru3INpi zB_zb^b6Humls)fv^h4HHYeUs1F(6qlg&&KAOsVi=k#{%_gsuLXw1mjx*yqZJ-^8Hd zCspCtO9cg%1j~TqH%fqN>vNUD#*~zQ zND7+kF(&a4WWOv6BOWeg8Ox=q_V3s5eMWd8&t?z)V1ieSDl8J#Ya!YX#SZ;5I=OMh zZxny^u7FiZRwb@~Nyk`OEL_6&21ip9_d2rilUp+UqI{C6(@$45nyO@y_^gUtRUa>` zEETy@m70e|317>_N-Aa(a|F&T^pGsi_t9qqTq2g*h#PPd0TJ+|ixy7!K#|^fG=t7o ztcG#3Km4tm^c4SA@lC>CP4>yA2UGol49EWx?_TLE&W0?hVACFQp{UUC z%+zyelTw9x&3Z@u^xCm1r+Be|emHE5xyNBt#7Vnhcvf-9I%+|2bH~xXgGmRiux( zopKwhvEXqysNedRqF8Z$Fw(d@m&TDAMHj*V`l`qx3a58z62pyWZ0m*Gb;3OuGR9`g zCTt)Ikzm`*MUzI*DQi6=Lw*+BS8rWSGTIz`eYdO?BW=jiI2P~(rj+ROYx_ac+{N1vqvUSj@{AVU^(FBo|JzX z;q$4l*m(PgD0)gDfp(01icgA2wB1-%az<+N*vvT}nbF73us_0&)nrcRltu{bKyCh7 zJ0(z1GwW%il6{}Y?4Q@1o>hDLypx4n82phVt1z)GCZJb7&(&~&VW9l3cC$P%BLy%)9aJ?R72&O zU2oJMN9vD1eTa|hoY}%`smcBaUla1~6p3K@Log-x0`qH&RAZcm-laom2Hn@(#Y*KsNK9AdsT7{^jw4D&HCb* zdmP?bA3_QeHQtA|g`k8$joQn;f1t3gYx=d!(c?5^heGqAyBsXqe_@eIET2lhc|tgb z!!G8eJDYDa@(6m;S=lYqy14;Z4@I}bX*w5XXnH>FT@9vdiz)U2T8~1nx1;fL?<19x z9`nNu!`d0^RynkQFr+EKj;_-DV zGG5;=rzZFBlV7>}7mL>vozSUlCfuVf{0B3l@`qV<#N-3jbNlR}W^^(n*QED5oJr3f zU}CvjJ{rH+HFDlus}}~A;?^uKzRk$rRzB)7V+mIfVqS90Uc`D&2p`>cx$NK*5}<0X z!_c~-s}`k>N!>WMi$qRSLD&*anwfOJLM7K~os`KJ!7%deYe$tBZ%@E??QJ3s{B4a= z*T2tK2~!f8lY?rFs~@@+h<>H`IPf;WW6cs9%g;WtRHL+6x$;x6V)VVhC@1hotso&TK`j1s}o7)HW6iB zpUiDeUp&pVZu!3ydMNxcLA)XR{~XUKZ-I(6dpp8MB_GM*&o!dFyIO9l4DU2TR|klN zf%)F;u(K!le^R>=Dg)MbYqL^=p@M_Wgw8PY_fNf}vsqS~#+tF9(nv#^YOMJZyeYA& zO+_^A`)6YDR(D6Mq}9SdrRwOAc=dSMMdl2avmP7% zCi}9~+K6237s8Fd$EfaE$UWpL|LKFCptoMh9WGuW{Zsk~`19Pal*%YvIQlIhLg`r8 z&IeM@+kl9iN^*qnX{GF2E18+XixPTgGrB|xpuYE%EF0H_!^KOj-5Mto)S0O08@6&O ztbG9ATsZ@Q6}+Wj6B=|;vN}L0q_&{-kp{-NG2`|o2U3jNXqL32CrU6fe#PsP44>OFY3vX(EH0yBdN4Oo36JX2Hj1*nB)E><3g%=>p}m*@(k^E(lq zxGIdHH0HgkQjeDxe%wuzQu*bFG2q`&H4X#@tPe4giLV_!!|hu`nGI>LlhNCn;rlyQ zN;ScmFy+tDeNc;AKP*x(RUjYFDdqwsH^0cQL1bgrkbx;3Qw*Ot75u{FvTA{dFOhRk zh}zg^JX=?6HOB%lPL}z3B@X^34A`t}45i_Xb#uMncz=Cd&{A_i{o%&@0H%;bk+hVI zh4E-vp_+;hY*sjHECFW;A?LtzibY(nKQ6h4k0Ru7`?s9mH0!U!^w+n8d==qL?gsw{ zpVS%Ajq3VFxMZg0_Z+{6znL+o-`9)f(EU*I^@-4reJk8SK5cStFvBj21y>wa${8FL z&nA-I+9$TYvJ9|W=n7&YDJ%kQNSkfO8{I=+%XXjgCd0Wh+q@&Ewsav6#PPySs-qhw z(%MTYP!mU5ti5LBnVqCoH6b3(eMj~OH%y&;b*Nip?ngX(n})O}Q7rm=8Gu0%WA5<_ z(lT;_XU;u-VRm{>u;<+4c_}ZP9qL{s3d(w=cBVyC7w+sPE)W<8hdaAerSg>sSGBS6 zY`ORt-o$YqnZ@DGEVRB7JgJ9Aq!5;v-%3y z5(zvQ?c4CGOlGz$9rpZNlnXWEVxRXi><>> z1DA=lOdU4uu+6Gqit7^&&mC~UNR0^D(ZMT0XK=cYlMWa1|7kZT~J$#gXT}#5Qwa zePe2_8V8Huds4E!93=<`#xe-o-Gi`UuOs)!ApD*e)m+hRzrYL4WxGne(>p}Z+a6o@ zolAY+HW`CM^BIG&KT{Y?4Ij@6rQs*ix`B}x`&$_a6x~PJKiS8py8PJ_`+Yd8;xI<# zEM|f%jM4k)&;g@T?ph<-hy-(srd_e8I|9{Uxy`e;eftqErhdBoNLHiW)^BVAMoF ziGmVIGy@6D=mhZu)>{zqMlu7aAc0B5VH{=GW7*YJyjERx6?GL*LqZ?{Pf$@1hiJDP9&B$a^V9DKuis_rRqa;sTAPRoPh6IK%3>B-sP8))43w5%V5V0|%K;sR%- zDKM{)zSxM=h}jTP9xI+#eQh#NqZ?WToe;tpuB`9DP4TYGP=p; zieSm0VF*G$JrW`+QO?vrNSH%{#UII;O5^t>^fZp&)AlH!{RjLmls0Yr{+bhBOZdG9 zGcd*13w|#Xn=pZ*;P(PvZ2W%W%9il^E4D4OOF59b@#LGp%LO)g! zp-eWTM0$Fez5*N7h z>*&XuHTogIxmaEDBzJHl9F!%3EQGcHEy^0%0w{6HlM--I7Nih`?=RKzDd4B%3y~TY zDexSD0%|UOQs%s)Yvznlxn@N?DZZJh^MoR4<_(9-Pug7*_5-|ygTJuVK}xP`>fk6_ z(2bStAe32nK>PXonijRuqcA^uPxVDz4a-5{pmQOfBMu^v1rD~V984jj7^l_^i(cn* z^K^P4^ET!nq|dx)1Hv%aK!NYrV3ZH0UnT5%C)H(;vI>6RJ%;eX`JJ5RnS!Cgn!th9 zQ=>q*lW^Eg=Bw1fTW5!_{bK2$`Gig|9KJ5f7{v^c5KUF>*n6eyQ8qq8WmC%<;{!xB zG$4n_o@#*R4!*M>or`1|_$&Qv@BpmD^wCM3J(A7}zT5o`sIfNh@4@xW`un3!pug>P zf7g*bf%{MSmcs>7J|FnCWYn;K`nSBQ+T8^>8>6; zn9_!q@+xe{*6Z~NwO6qoS(~f-C>mae4yf`IK9A;+Sfvw>?@QuJX;X(s^l0Z9^TyfI zje@`GQsM>Ce}hUlg+DJ5OiLj}FR_D~Y~cs?dZvas|JDWh4v;vX4irg$G^X zROnqC+=#i2d7B8Ff;CVpVOwcb@p*OT0Mb#;o>-)~uW-TaGZDFNFOm@Qd%Q%Y3$B4i z6vCZFQ3rkN4w^|^V0_Bs`X+4_fg7h{8{`Na+@Ou(a*m~8NL|lv0QYUD?4Psfi)DrE zcGqb=kM18tO*+}n90C`mO8c%V$@^h4?C!0SbtRgnM1LjCIhsdNn(>?=cBLqFi_l%= zRr|hExFH*ZnsDUOvmW5Zo409loj0#x;u5A_9pKGJj}uM7F54Evj^!!s(dezZPAc|F z#moI&6oK7gKG{>6^?w&#D6*jLA{--o@~KSj`==_rcuEdnKhS72o5UA&;`+%0GGnQX(iV!)YDc>gR^+PT5}zqtP|1leQqGDLdQ(`|ge;MekkfR+)(fzo0>HRN`X;@joE2#&H|JH`K`OrjrPCc0`+a}>_Q?aPErmU zZvZVOGhJy1MW@9)_mh-?Duss$szK?DKs>|zwY!>IHojXW>B_8(Xt((CMINtN^wql9 zS*XDZQ+(TG*89n*JN-*GDF0enQW-)?`OSoFVr9CmLs94GIkJrO8=+W3vQDv37n!%H z#Ex5|XCEbBo%#M(YW7c}S#$TrdhVx~$AHLs_06OEvBRdqI#mg?|G8K1f0(|83e)5B z9cqh!uxj5+2b##2lXdK@Z76-2H||B~{*5*2T97j8@7`ke?1bxg_(MA-0Jsb>>B zIH7!e{jH&EYMU2YTfUt)Nw)PZZ4UF=&{EZ=m`No?Jub@(L0AGKao zy0W?|my@c1gouov@GtytjDkenfNr45rz)w{aKxyEJ9%E%Zr8B;kjJwsBB7nM(FPqJuEl05p zl;h`DRB!{`LD?@P-m3%?tJrHicpkrS{R9#~dhT%|n0K)gfwY9f!VC}%H*yH}h-fE~ z(`L*OS69rnxS~4$BwsJ~rt;CSo?Hr0Wp_JI_4jqb-2@u5CmZhIZcpnA(*v~_ zN}L@v#0WjpEy~C{882m)vP5wtrym_Og4Ow-y?Y za+vRUT3P;SxB7ddeJ;Dpd8^BKWNAt8hY}}e@lSBINmx*YINwkz+J--5B5xoxvaX*& zw?~#)g#p~T!Dvz1=Clt78ed+*EBKWunA~1XPsI}6gVptG)m!oM1PJc(v>w(Rf-s8{6a+B|!*eG*Af4j~6I`oq z(HT(j*^~j?yKt1f-w^e!ACt)C59@rB^Y-}s%DqAELf>*fQXz#p>o}oarBr^opLz3k zOdKdQ5?5Wpr?RPBOQjsTe^KwjmGx`#s;3T$JST*s2l#Sjx4|Hr&4p@^+{HEDl?r)_;|^-&Yd7Qszs;wPLJ>vN1~{ zIe9`Ca&5;d3&0{1k~+H7IzTHbgoC0z9`It(mHE5WuMK|S+$QTM_<3aEuT4$$>!U{{ z;|l@z@^(-AQmV>2q$){WpXlH;ZjK~pbmatoS`fo}o-I0imL zPBI2cNEp@3qlwK38=urnM9u_-<=c(&Q~7`jCwH^}9mzciePyEszMb9Si$J=g-Jvd$ zB37{=c0X2PwQF`~*6O}d63DM?pMPOG^;NP_IB=IFVhb1w2}Im*+0Bl~dD8#*^nOk$ zS~O;T&qMO`ZX!jvI@)f92!yuVHv`D#{RE$KM1JL`<$`d97M}H}bts-Yjw3RQ?P?uU z^px0LRDE|T0lGF8x!G~E!`zReP)--lnm`S{_qEQ*9ki+X&34cX>Sd%TXi8QRG;e5Z z0h&3|b%AEteg#aiGeXU3xL3QYx1W~?zJ&tPn;n;#x5T=m`hqI+&`Dy3LR}&wX;HcW zDNZ0wLW-NsDP3@n$%*(O?l#PfSufC@+%aT<3J?HIeV0Hp>Cd$D@KlpiWjYTq5Zh5m zRj%N3b$U2(u1Xwx6N&#IQSZU2Q-NH#U? z`h6v9p89<&>vi?}URD*q?l3#MP^ENtxIAm4qzH*}D{HlW>Q$KJ?{blReQC`i_r|*t zpLE49i}=;jw1@8Ch*@tw|DqBG6MB$6ox4c#ekeW3n>Qw{l*O}w`+&@!e?0(V$}J_| ziQQo9L7b&BpBLHi7zj;ktTeK*(jcBu9e(n1OIS=o8fiDLMg0adYtm&)qXwdl0@2P9 zL<7gXf@6GPJ*xAclTmSb3%JWt5CjHyGZhkY$Z{iuX2gTM&Yj%w zQj~*!WIs2HnXN_hG*>_*E=xHPryHKIq7>bCPXG~@#g$uTb6GQl@j7+lHvsX&iII=YVlfci<__2X{ zi?M+Tu2$-agJt{Lb0-(kU@Jc0!`V+{^`mM@OcOpYK$@sW)yphiTxAX2jlhG_228@E zRhetVp0d2j*A3?B7*FUjhwD8mfUnt8npWGYFL!G>oJF%A$QnqhE4!-fC#(bZDWO%g zhi!iyTuw~mdNg^<+`+Ct!*#VHa^J-9FyTFn@|}Dr!m(;I)5n?Cw2D@PAwP4i!NJmA z(ZO+K5P5}^sjnBCZ(V{2#wvVCrvFiCiLAtSO6Q6Z5HWkqpqaC4Qup9)?EgPAggJQuDJSrSuHN3#4L0iEWAHQDiA9%%v$;7 z+RJMX=oYA1G)x_^wGmH^|oul!{HDC&L9I_0 zi`CD*X{0-ako7wxl7s90IiRFSw5y`o3SMK@;NsJxD7YhMy)Z*a@HUz6>}qOqt|`hh zjR#~!xdJ%??oJ}C>QFry`%#tw&J$XAP^(kR#=o&<`(6Co(kfLx2IosT= zMS`T<61r1~myu_nmNm*hr-yRZ!!i(=Mivs6?jWc+T1@7H%gmmG<+oZaI9B+uN<1nJ z8;Y{K`*Hi2XUw-qE>V4vdRtIQKx-PBnz)TMtjvXMAelL9++%i8sDwgFf%XK&G^oO> z;rAl1@JDo*z8m##pM3qBQcvTd)M;DM`(n3()lO5>N-223{Q_2jl`!_EcLU`DOWnyB zRsE)-kv|`2(a2oF!#0?O6CIB5FtO2YAE;X?b|!zSstFH!DnC^cUWz3w4%Ds4Z?gbc zG#P{If^}AgI!}s7fVCx8I$hKho+yHbc9RnX=z%3G1N+-e%8QQF^f)~6DT<^{J|?i_ zCCSK)o*PF__@VH`#VS8Luw(`Kli9pd%7Pdjqzra*sG_OFhUQ0~N@55Lt?{&DJV??0 zVh@M+m+bfEqtWR$fBBonV=$h}TN;nScnVq^&nap=#^fGIyiypCb7c#|c_TV6X*lp- zBiw=Mqa6O;$Q9k8k!jx0uxu5JFNIj2tz3bPtvsttxf>%`%PC}3| zkw2AKO#6rltVPRTP-#YuS;z(mBjpaZ6_WU*tWboqW*Wbu{&o0?BiHZ;s9Y-r9+v`vf_XHg_B zOuYU;?khSV?Y%`V?d8zbIZ}fH&7b8HO!ooJR342kL(*zfgOV+tg9T+gneVPq5p-p84%t2wQP+Wh#_SfNEDWi3V>(PAp!`@dMdOmx71#cSp`w$>(CP{~ zn6Y^cl%zsiUok}nDMlG+D>6Yrxfs+ae_3r}-+RNaWR;O09j@4~4YJ)FNC7DLO2pvf zMl5wfy8D)f@$lD+##3BuF@EM2da4y0EUCFEow(v?JPD>s)hY;&tL!|7?;CG$i@2nv z%Knq-?k$==^pVo2{8IqZ>Pb>%eaSy5F;6`?9JS?H%jI!VCV$-F_p)B(m+f!tT!`_= zDx*tz)*8v-la>6+Ka4Qg_1G=V>1PQppsR5h6{0+_BnPrf19|r$goH0eH9ch@pDkE6 z{Kuv8pSylcnt0g*6MS}3F#V=)tVY=yCB+c{IGyb%Uj2TB~)8-E&NFK*RU0GU|>2U zRhrQ>^+5a#Bq)b?GiwL+S!0y8yq7!F6&?)LfI779HG4=EqNP(?`<}o;n4Mx@D`Vyfa%$l!uW%nixG!rQS69@&u{{U4)y93T$kKLM+z-IfMfkVbT&%Yo zt%At2j|^0=tXOfEH}V4&TjL9lc&coF>g0?-^-E;sMm{1dPEy9r36EGzzA;OQGpkn+ zHS_+Lq=?-=T8cNo{YW$s*k?={7~4PN^-Fp)c7Xj{)jv5qP`!eD+$yTKq#$N>E}K0B zu{C#q#Crd)=(F$-`KSAi&+jlb!*1VrTbCm}CBOajUHPYr`XRDKjSxIp#Bv8-8xX{w zhRDf$k9CE?N5xW-qn+t*)yhCa`-uaT(|%sGw2x8Qnu{9CxVrNDu=}a9eW{Z>2C80) zer?0cy;#y#2C7y>YjK&iOHV#p^9!oJc8p&_B+|ZBM+S8S8iXCni$hoDC#>r`k^0Ez zL>}(=HV$jy{%$>bt22||;Xd%{t?fBnWpT)aGlT~0b~SxfHmJS2T&muk7d2*fmUR|L z3TtO8zN}x{eor>f@;Ly3qk)*bitwHw^0WR*5>~6z7K$N^ZMFKs2UN>NC_#i_+7!~L z5JL#hEp;kDmIRxs7voi+{EV=o7-4^AUiZ0@*+@9BTP{x@FY4PT@iS2&$0ntjGry1^ z9Ort1S0ne0>&jWpKfS0{^V7$&DkXNc7D-OLk%6=NfTr7a(TDy zCu|=w+4jgV zc^8rxu^fcO!5PyXR+PrWYNEfpBvRO+{IHD5BLe&T`j3nrNAUar$=vazlBi|>WTT>( zTKF2ai?3Tv_S2&kTwy*uJ6>vvQPE$QN>Qa+Pf?`^_HLD$Aa#`*6({Oasj5^;sVc=D zGaM5ZcN;xmH}{3@V>EJ|jsRt>vJ93WX3d~N88Q2nGKG{mB>x zu>)7SGi3$lN50mm+D0Lo=-mAHx8k~GZXTn0b-T`8!m6*7E=LRK z>S!wwwLoP*Nfg_qBAs~E^(2brtI|H4t`kezkl3F@z0_w>w?(fK_)p{+B0kL)iG~vY z_w&>hs%_ZfLo>8(3)P6OpipklC~u>1oPpmJGs{4VCgL-VL!8^xXPVG^iJ_Qf3SkytT(^pbdF;l`*@1}xXHm)L8Z5&)kh44 z1P-YzLHbiloLkj5%9MYzn5I7=Uj~+?y9eEh7^dPjPA<_~jEd9giPGj^*OK=UMZ|(a zJZfJdLnI)X-q2XYATUa*hmouK(5N_Cf7Xiaz-*QHnO}XTzF-8Av6L2BX6|-zP9NwU zget`-ZwG9ME*8gb_E#b$mV}0C@XUlH#=zsiUld!EZd@`x3GW2AcRB*sK!DG$(Pgk61`cv?qS940-hU;3NJme!E+qm zt}*b0$%um|)w1rDbLWSqxjz4i6^2Qdz&!Q!qd?DsYWZ ztSP0$6l&GSGIdF!mjyu3kt8n_H8+02xJc^zC$VxsH_MK=%DoCN%})hXY~;iq=#%dm z)Te*xC*YOWxz$K-W8`qGPH)S3*vOUxHsSC0J1190Ur`C}ehs?x>;``)SP<+#svF!vXY^Tw)+oSO9?Gx3+T=>&>e3t`u|)ACiU3!Av@UU2 z3j@jfR&x<lE%PvnXbUX~znUoz3<&imLuCi+xj@gl+R z50Hg39bJj_Ay<&5cm{RNpU5`I?TA?a#40FVET(RfAIi_i=TJ@Jp#1()bQ>B6syE8d z1=s=;NupNOWxMZPRhR9)(;6z6WyZ`4_$1Osz6%$G&rut|KHP6AF?MyB_g+m8!v&Uh zWRs-#l=N{r{e!1e`9EO{rH(z`U_{s8Nuj z=-h#JE-oUHyW=X=pNnqP2w-b!nzrM`6Nm3mnXcI$X^btV))JYOU0WC zxRbo2U@%=TF=)kLIzT5pL{z+4LXiS+;W5v_?Zf6VyR+z9<}n|_n}OqGfdS~^JZ5J- zzKJ^XAaS5#JmybVs-aIi5+3uNi{LR^k?_$+zj3^M|Cu=6k&A!FcrOx$SB*D)c}wFx zAHNQJyr%(?mdE?l-_&?ZIwy^{yB^d`o!KdIysdRs_z1>ZH-hmVafOU`1jb>(kyh*) zHr!!q1Fju59wGk+fqgC%vVpp6acKTvROG2y3axvh-T>B*Ha5Ut6oLSiC-BF7nhI_E zTEud0!j~E42ZdaS!%d^LsVg(&Ov{r^tJsG1b}{zN z%w${G*3vij{eB`zl6^H(Sjekh7Z%Z|I4JG$zMV6Syl>~->QIYeul-g_|3TU{JXTCe zJd&hc6QP?4(k>(-daQS!O1dhJUf^nTT`zeR@1LAqdi1XMWZ+y@u@rrQ1V9-7C#_SM z0I%W6(W&c&3%!tx1X+utj@NK%@Z}B?xY^$=+FuJI%v!;-4P@P}vwR#&Qx}OCQ3XVQ z@Xz82v>!d})$64O=?rd6g%ZDFAmLP0uoC!<73PB%D~|hY)nO43mk3VgaGR!wZT&A=#Tq(_2$MC>`srjG}Q&X?nd;1GQq1{cNd zMUeGg>Ut0|;H;vB_W4flWWzF{+Nok6sO50%hm0!2+%a9OxrZLe6a;wiA02&Z zjv(1`P?2pyWbtDm0^fFnvEil)pcBF9CAm`P|oY zHun>!bFJW+39wAaP( z?n*Q0L0w>ef0<+!WY1NkZx@mYwL_b)@KGFGN%2P@qK?4yG>7rvT-t!MA^w`$Jr8mT znT@Z6R$>A5Un9o#f=}QQ6c;1pF`P#7YNl?8UhOKIdP26tcey7pEz{w<&>ifPmAej) z`y;geTgSWi47x3|^vsfwpZHn%-Nl0DdR3c$y(cuaJIb6!bN-2H@qX-aqWMk9@&^08 z?o<~*Tc=jbzc4-;sbBWPOJo%H4$z{=$2QE!8izkxPz^yymCcq%w4B~eQ|8RrJz?pd*!Q1tfAn&y5P`m z7N&B3F`F(?NIXVoz1y0sxCs3_ojFuz-hVQgmIz$}UM(e8j_|*cKkD>+B7b!D+w{F< zelYbAelS;DB95?H%+uFe&y)cXwzvrSu_(8?PwkTgc;km@|4*S=k;}!Yp`Qa3VJqSkAd&vUKOm+`Ju-0=e`b@^-XJ1T$`r{H zHs3vP(7*JV4h~4M46J7h1ACG6Mob^Zc3?Ggs|PmL8-f)4%K26dt?97;22XGYN1C%U z@W7LSAvN#)o*?m3tWfHz1sY>_d5!r!jS4aFQ%0 zPo;TUGRi*^z|T0)B=aIHkTKKpd0j+7F;^>>sbREB-7{*OGCea6G?HN4cb_D)0f^VE z?;7Z%|7+D*C>;I0=UwS9@zQpc81wsQ`G>|kxdu=Iqmtmy(#aQF08@$XIf0HQqQRhpe>2@_mqee{EQqJra(07-^l zAtQ>4)B_U>I0U(1F=vIC52w*XWN;Iw1W6I5Yz1cyN+K^gFTnYQEwZ05I*8Kh%&SO_!7r4It(f}H1V-uMCe z*sfZUlijnR6G>!PpOKnWv+);u}fJS*F439 zDxBvopMvM72#&*f?V<9nRBofWJ2)zRtT%`oj9PS8CoH%*AVG>n1j+=x6Y?y3Bg9?} z$`emXfbIA%vi{Pzyy%qt(xLb*OH>9(V;wpj(#^*@S;w>0sr$f7xzt`-)dfc2%N&+Ry*#Nd%6f5w9mqH-X9J5%iP zFj$AS-tc5vCupL+E9{jlh+4~+g~d!#&_8g$)6odTyzREibzs~ga;6V^8SV<(V&Cg@ z8rnd0SD0k_H_M7>^A0OyfwaWG4c5gaY5>tAywrHCs&R*^PVRN$Vqtwh*c|IQIx93J zQ#hU3eC2`T*8U|tapApOWFv9qhJ?E|xx!ul%nsCZi57Wb=g=ot`1^{Jq>z)?37>j{ z{j;#Bn|^|0p#UCjkBa$)zD<^wpf1p9op3qJds2*lzw6qtT&6_H>k{r`VW=(M7Y3k? zBowiASJ*~0!nIu8LL^J#!pUu6k>c`3w_F!CUm(=Mzgm>=+S&XYwqop%MEomTm^ORD z?ose2BazgE3+7#+u*ZBF?D-04L@x!ULOSuH%Sl`#iRY<$-GPHmzMjrK9H<18xQ}c} z#S|C+l0G@=xbzN-FuIls*8M@%`d6erO4g@w(tkdE3Dk6C=ZlPD&Rf7wRFvX7A4g8> z!W+?b%6%^vG8G-Oi1sTule@wjW~AIcCdYMkGIm4A)xQLjBK)Z;b}9UH%YAl@a=Y5u zK6QqQ3{>{puA^5p@{aa9k;MehL<4ti9SrVE`t^R+=(Lf1`H$;QhbF|C_ zOAZCRPVbvNfhbSlXdijPIe<&3zjuZ69}pbn9Jo);@d))hVWPYlMI@!%&X19`mfdx{ zgW&mfPT<_+3JlATKgScPt=4q{vgwOB2DME<(MkoC--)6uYgm)tfS-)_7+;Z%pWG9a zbeCPky36h@fwqBbyp5cioZ5k1BDrq@VC$-&0(ReMDE&NueI}@nhrdlH*ae7!^ZE5@J~tng$n{ND<%vub|uM>O7x}Z!O=S zsOaH2Holn}-z2cFCWo7f%J(R8_=YBjU&b)b{1|ztMGl{%k45oFV)5ugbD?$Yq8p#E zkxl4eKqq=c1)M|&n-b~ZS143Kus(UTm?vN>|J0^~pI@eG{nN8lFp#D-w%%4Oh`L*@ z1$jdkxK|3~Q9%4chO2B%CJO~O6XU!7pV7yglK1a352cTg$04ubH~-tI<0}c&F=SK6 znHF`tUQv#I`(->s3ThG9Vo4!j>m(c5XX~k4E;23WQM=H@%Dup zP0yF~y0*!L@_&FlwuG-O@8AD3^s&o7HGRA@nQjW>w*?vq%J%)Mjj}=?SL(#ahFd7B zG48MQX(8phdJ_1kiNsw)vxbw&3Tul(VHw1lG+vM>UP|(8G6kYksc~lM?qp)=z zRgcg-?af6PFSn7Rp~n>L00w|Lxs$M)=<7Z6vO` zSS21ShEYf}eS+ze`16(Pen3YZp3Z zl;1?-(J!eaO;=JUyU_))rTmO5wv>N>N-!O7N&&A0oB-MEr%HYllg|nNPr!V-%#{Ml zgsm6PMWN}L>YmVTeCrL}hy?A2R1w^9G>Bdh4}|Z)R8vCJ$9F9F@598ymjGb9Zj|Y( zLAJQTD1VT;J%Rk=4^I9ODc$ifw;aiu2fLqdRLH4X8O>J7gKacBSuo+S!S3InXbU2| zV)-as@cj%0of~@q9b1HVL!kh#nF#MmO%dDNMZx%S<{T*sK}OU9jJw=I=1AJp*nNV| zoS~cWv?bGycHc^8jni3`I~x*(e_PH|m3nk$=m@?`5dPJXt4?npeS{z9E-)BNgEKuS zj|8y?D#RY<4H<&h$KMB9_xw;eMGy$-UKe!nUEFzJ`UhV@Xkpp3f=jN5h;)Kc^wFd6 zJw}5HNoC<4qZ7Xsvtrq6^w0*=3D>*4verJsl%mOZDiLrSxmMUqxQ!y`Iu~N|mKyfW ziee-287B)kb=`U!YS9Cd^RbEnav*eJT4-2$xNA6t%y52T-u}{I^{brKy)Wn(T*6i3 zd8;PnDHMH|{J@Yjv?=E{`BDNyIwFq53a*XPCt@K}H&h*1RXA<22fMn)$*?z|me|Rc zm}SHN-i=)j#%yemvq|F#Hso(Sc|+a8{a1v$&bQ8JYD4ve`wuD!bsgl*Tjje?3yRqs zJ%GR{uizsOU?^#~1TfSVIX{4`s543PPmOYddJ+bFe3AYF;3-q(b7M?TUN{jk0h>ew6+I&XYBbq~+`Ymkk!uWn)M! zOQ~YHpDU`XqXUwbk}|?cl!qJlMfrpU6}vYg&_Dt(EkbrLaTMefET`W`NtRes7YW6< zAl&s_%E-!p8~MIFe8S`h@y7_ADNJ?;^O>hujS?EGwOfaS^G^m_v6K<{tb~H&V6K!E zkr!X0WZe*D7#_MLonZDoR~Z!|3?|eJQNF-)j4ykXk-$cqk_MQeGu4^g=IY<6^&J4+ z1)_mz%!QMR$O=iDbM6ve_QVENvsnSldXGmCy1-kgtIyXy%`!)&b zrL(^Fst-@4!b$_s2piRwENe~@_<5I{FB^lC(*f8u#lbDwC$9}s-V4Jb%|2axeXqxW zI~7Z>Nw@K4ly8tB>WITeg^*_)z52>{iSdehlfc7>KVL_0CjNS#nWw!VlD{_5ww^j! za*kp|caK&z+rQ0>CuJsxX57|lZ1Th6$J@bd`dcg`Z@V!oB<%%FZ#(T^+n(p#&ph?N zMBB=PtNs_zXu0R}EyY}(>-v0SF-}PycGi--W&R1?-~j@=mN<9nnxEG-Gx_3wc$;X) z8D2$CMq&nIt=c0kKU!uqTrdK$pE>(<%B+S%CE?hq3fMhmPM?S^q`Bv)%n>NKnebDm zlldAGjx~=tO=k_zSy%63l3rI?gxE99YsgJlJ8Jz%JmWfjqmYm%cT>-5e_s(I-XyMX z3@=e3$NpEBa;(gU>~rqCHU6DZ7yVoYDWxk%NqaU@>|qG36&5X+o~G~fMVuBIjz5(< zBQPw@sm>#TTI%`ID4zxJy`j#Yz`lc%w|PTti$Wt|M^jd~!X0P0@X{^nNyLQZ(jn~3 z9iF_kK3vwTY=X`0eFvI+bKHTSAlN@asdg1P*9yT_pM5BN7mE-5?(rFX>R&~lyXZdO zdOYCk8-s1W0NxTqD){Jlc*K1E_xA6z6ZJDMJT7>3sQvpP`kChu8bdSo<9N&IUr zpB+JkyN0U@>C7aUMS8jg{Y7PWN^2UmoYrl9!X;82z(&oh1h)Ma6;jSz4MS8r?(m?~Mozn-JL8h&bMV6djYjm0oA_10`-dx+#-Z9lF)ak=|DJCPTM+F|Cf9qkGQu zOY{vG?Z9F+61k^9*H6%vVLmn=Kd~!^P4EO7TKmsDv>{@|rHyXdNMjGrRYUAAjX`Q@ z%rr5rCg(1l)9$2wO$)Ifa8yDy9p;B*#>X_LY`PCuyTJ4bP9GNkZJVx5r!gt5U*_<} zdS-qmX9h=35kXz3kOlQg(?%`tBwq$YezrR}GX4GweWG0Vr4$8+P1U6{(Hb;uwBb#8 ziZhvslc?Ilof~*aY%j=zEO`qVcvaX?H!M@WtQ(dkKN>f(L5P9;FC@jSlDo{3eXMTQ zJbB|End8a4RzJ@w9wykcT$-j}f-8-CmK`b5iwZ_$I3Z|M#}Q zevmD&SGoT0l$nWy|427Wo`L|xYKsf*MJbhzSoBD5#YTTORsU99x&K*?EE&>(H~2xc z%zlZK(j8Hcwk2hhFePlha`9H7QtUwQC8M7wC?}qb|E^?nKn%XZ0mbe^r?V}}-x>F1 z8FE2Pt+(fTB*@JKoSQ5G=bJLZk_`V>3P6?+GEvGN87@0L+!rP;iRm>MRA7&?GrBTS zz9X8M_pF)CwC;UZ!f%=5J2GRcnUXM~mn6*$)2;q(5P7$0&2OWnk15UYU7b9?C)x9h zT&*;N`NjI)CrrR%)wgfKcg6F33n!n7sT1hXi$H{SgWG4A|1aKn>R88=`At_!H+R z#a>SqML)DeS+3w$hg+g3{*ac&*J*ZaB0XUwJc$z-*EPi26Z!EW>-E~?`8>s*&xG}w zZ9XHL{$XcS7kx(-msG~_HTsDC5}nP{6ydoBc|z}(F^%rfL-OMZhSeKka5{MED(pR0 zju2SWPjQQ09uqzZBOkrV6S{4bD|8uVQLBxB@Pjy{NI;B^!JV!q^wdI6kUxw-r#%WA zH!j<&HUe(SS;qOJP=#QTRN@vDX}V>W^r_ixB*Vh)4eNi7gXd5_7(qE@l#5STF+MFt zQqLp2gf5(*#Mbk8Wok2RloG#Y8k-X;l1!QsRr8;qdkLgx^=E z9kHQQcXgPnPjoo;E^!*=w@WeAJQOcO%*p6Kb{7|6MRi#=yBGP4Z5X@Vh3Q52PuJUB zTMB2{J078Bn?__7VKGW5yd>JDXX|%rZIT=^Y5F&;7fs({0`-R9Lf^8resStwST0(> zC?L|8>NPVr%$%cGj*)vCu$LCXRQVJ|%{xz%xh-g=bNiLTkj#_x(K>zGL+Y!PCb-6@ zNtc7%xjO4poi*)yvR)=DN@)0kMLN+xntGm>db&{9BCL^1Y1RJ+joF`?FZ@{yiqYoO znm;m;f^m_@4uVJF0-vtsZzAV^l3Fw`R@+y)nmTK=Z5ny;7-fyJ;|-Y86@An=I^CiA zyZUE3B{C=Tk9>rF`a)LD3g3zDkY*-AbIPdJ9sINU;r!f@UcqEvI7Kd=a>qHLZ8R^ym+0}zNy-Ig-QL-b)^DNf|q82^!^60<3s=xAd`1c%j&+5Dc+ zH~2;P5v3mkGg))K2{Loh%{D`ln?T^m_HPmek6HZ~yhzJN1YJe#dxENVuWZut5gG_q ztW=CZE;=jr7P$*tQJ`_XFUTsOJx}ZQNa^RVi+`ij&-r2 z!TP2CI+{`xx=qF%wURl;hW&?`2m7JSPS%Ct7L?i0=2^~=-n2> z@`jYefXAPpA$$4J*#1|f43ZL6v3P;Pr~Htx|Hb9m8X-p~AY?%dA-lx(FAE=gDe!qi zeb#c3j^HEU6O;sg?Jf9;b6?PmnJ9>yKYX4-&0pl~5=YIPQxoi4ME@WQqkx6Ss#t-o zQD~%_dw@FA1vx*}u^#qKIu5g){W7yV-FXu-{gL{@2?v;K-^S6bLS=5-Q@0gNZtMM= z#M4lcy*^^puY@uGwPlNi7M6o$vJl^teqy zY~!1MdV~YEXfOI-{Suww3eB1a1X!4pwGOygkWE?0NT3=|=po@UFyulF-1QMlNuYce z=>%%vA8`DqocPX#yj{>|4bwP#mHK#?LRrC0IcdgmQyiI(#2{1SCTV#Kbi@PcU@x z0ELNd5!%L;P5;n|6Z(-DS2nFiXkr~;Ig*tB(1@1UHue(5N($h{z~Kk~hTvzB2g#-S zj^kAI=Y4E1fH_ox>X;p)DI(_k&G-kRz9& zushIzy6g>}NA@YFV~lB3yd>qtA-FCjooT{KY}Q;Vl;0=# zb-HjFR8mO~4@xX)^{R;&g)qrm&A(DZG!yPOC}L9OF!vgJBi#5dv>VYj_>*P%oU(-f zyUPZHist8)|K0Vnm{=x4&5I6QE9b%BL(M0CU?X!)PXNhf=_oBppH|Fr5p|}f=6@&C z!u06ll)CH;RqAh3td*uqmFX%5>deMINh@v)nTws&D5jN0`B+(T`{7n(O0{!#s1L_f zQ?0mD$<_Rf=x+n}B0)V%#7~vkC*#wv49Rg)-)p9Mc@#+;R+7Tz$zEqPJ^uoOawcT8cp#e4}h4y zA*sQu$KVaVDQ_|iL{j>NpEA@dVYoF2Skb_ygm>Y$+ZitekKf6+!f)O)v@Um*_PVHj!l@ds$&y; z^dd$q|29M}=Wi@M;^xifC*o}5#qdO^Sfv;wu@58>&hau5U2u*7Ges2GA2TjjNJ)}- zeY7e_bcFH-5|NB3ig<%7^gz};5hy%o!iM{CXxcRa$|Zg*k% zdQOfX_+Gb9gkQvE*yh|>jHQF7H$pCp*~3yCK0n)6meYT`dH88+9#>Omg>x@5N*i+(uqlg z{RN$Qjm|s?=dJ`+YBrg1elUsNWXO3jf&Gbz`wyeY@q`a?!l;x5$iEG?{Y6YZto$!H z-^0IOo6Nr_%7-=oE(BsG{<86-sMg&yXWNkv+id$Dkq=vJJM!US{n%}c@=UcBMf^Lg zpPU&j_WNYDFcVmzz9hJaRl~M%pq2k(zAseYpCRe9Z_V;)8S=0~F1PTdDc;EGZ>N}* z)+`5`M;Vc>F24|}A?rdYgui!0RP})aPMoBqHeqm*zp;z9z+U|-q(QM)2H*Z?_Non~ z{w;e&uPptqQ6Ys@cT%LkhvTv%bn+&u*Q* z_AZs)DxZ(Udr+_E-a7L=D>K4JS+UH{I`b)=*^Q%)CO4H5M~)aU=JsQNt#w2Z18bW9 zOSWoe)9p!1d?^LVoT@VipP017Um!DqcN0_N!}D%(g(o$2(0#fNX={Sg|0-&V6N@DI zlIVYN^K%j4C-`~r7f~<8xRmDF!9z8`u}xWMv_*p0 z7mUEv=D)9MIql!Mul%3+)bhrt>X z3q3CQKV;*I;YPf6MW2%QFQsVqjZfacPPcK-(pQTI;nI6s#<%ACnEvE7TUjll{|D@s z=*&=H9#D{t>{S4v{RqAKi4*rop4_3AwFIi8xDq~ONuXA#AX%ZQ;--bKj5x+;+ncw~sJM|t@vOo5Q}LM6 z!-Uhd%?$o7p%m(aBD$5#FSbnoo9WLje2qKUL~I9}7WC)0Op9#*?=fBiydHYEJ`3^* zg})bxsTM?C20`qp6Ms0tBCXNFm;lPxtC{dA-0^hS#{vcI^0|=P=o)HRfC`anPoF>y zR{}we>g|tEsD8V4Sa^DowLawDhn-ziR+;|pp`-ckTE3I0VqZv>*7irTSpzmd9_Qai zxKt#B7|)id{E{GZ!W@OKNn8VmRer^nz?`^#-`0Ok$~Y|kL!BondyNg|QWk1U^tJ^X z96otXVs~0pHu`}55}jET+()!TO65c{a^>2yXbbY9!`km$p5YrH8;!_U1)n(tar&|R#S-mzh9h6qtjyB{+OSxeSJg<8 z;t+#x-L#v>@C7!>eTpuU-tSI@hojE~pT+}{=#5fXC)SA6=Oz+GoqdNc@6cKA92EVy z@P?H)Q^34P=Y9t|gVm3Y=EGa856{wBbvo;Dsq;3nG{PTCCNU=wE!IcQP9on)`~>*> zv(Rswm?YvaxZ8|={|4OE^d&~WZU08>jeZ0DPC@H>c>Ha!)CY~?(1(fA;pjKQE>Zev zT!m=5i!?=d1x#n@`C>rA)sx4@mmB-H;Ah?{qkNZ4aj1WKQQo*T^tpk12|4r~^EO7S z_5Zb+NxmBxN2>Tk9L43{P)HEGDD<$<^`g+rLf50$i&ulMuE@DTTlZQ%yhK^-*v_5z ziGO1-VLKIhr-THoOUZ+VA%4J&IH;-|BNA03BEu#b0X|knd3(AT?zlIc|6q~rL~M7!_n4*=Uc^5l_cNRxuCkqdKJXHG$PW+thEpYjQ)lV!+$*}}SRZ@p z#2;iAiYsEg^wFj05f}C%8TRG61HUx7+kK0;W=*m4Cl{`Jhjs%8p}dGu?gy!&E6{bh zjpCZf?Yw;?@hAQl##h`naeSBKm%$u7d0pc8TspCB^7w9T|35pvnr8k3&EJ#aH_7^K z7SHpqBoWWEwXEk0fZY6%WyL*J#{dTht8trU+rc46H5+Z?-CHF3wTKmIPquEbA` za#1cS$F6TP6yl9W6Pe)Hl?F6`T$RpDyT-ywy<0}XTr!{4nKvCtW_DdoHeGy`&b@RCxyxjKg=Ai=v)b!s{_q}I zw(HK>WX0&UIKZg;L|XReO2waR`p6`Iu9V;D&$ZEP0J7rGb+t&k|D!*b8#9-fKbJIS z`*WQh&-kzVa}mw`P=Bt~H>xI?{6qb@7V11tkf15V+N@ZH!43pV!PYC{fX!&#f(p7b(SlY9Zq8E*6f38ow zL1*adwBYD8@#2ctrsN9*V^{uM<#fOsYJ~k3e2Mdh6#hGi3a^@=19e!#n9gaJ5-GN-% zR#sYY`oE#qXB}v2`hS65_l4X-6<)eFiC&KagFwb~ojFPlKEfqwdOb*I{)CG*UoPyF zM6bJ(nM|*_k1TrBnlLXPEjO4ka^*oyA2A@v9g6Hq8=c{;L zX+2-V^Ly6wb&PxNLAjMr=S8xU6pZGJ*!U9UH(r#>Dj4DyE`l!v51K3K z8*CocK8bw8i>V8aFoTv8mlwNm*j7wb(V!MdQ2LErh?!e;2{9wm%pBsrvWn9Q8EDKu z;iu9L4*!u1@H-xWqOVocO7rjV4iJ&{i)G@>?9Vu^+4|IA!F&qF*woyr+|Lqx2ovo6 zkpH{kOkW;B1j;~umcxg5DfJh;zqL(H`ML(N$$0|(3+X4^N#mipax(RXUYy6z>@wl$ zP`bN=$D>n!kIA^7z=0A|S#C!aa|TABtQz9jJ8sT6k8yk5IOF=%&2t+tIk__R0`;4{ ze4KIA@;$~V{vX|G7Kg2Ia*1@mIg7{Aih21e_LLb2-h>6N-q}@k*eHLBJW&tR^OPX} zyW9&uw&>I<2pLu4h2{}Q>^w;8k_Z#WMXlcMoWz*;osEuT`9`W z)K7C1lo0uG3#Ekmb63lbz72TYswy4JRV3k)MqDT}w$9hin|HvNc?k=Ni&MB`)U~(P zX*^O>_%8|5v;3Earpy}uQDjX#19yikr>{4$8^Z;&5%?TI zaBx%enQ6`EeS|SG=cmW!lX0FnTt!*;-F!hfQy>Lk{V!i%eMZMeJ8ul`ygu5**dkqW!KN0*#4(AGRsbW-!N_Dg=ka`x8%j~U)Goz)`b0v%RVYFD~P(0$*h4017p9OkLtq6fLHl&i*NAM>Ju(u=PC zUK!&~^Yl^+8$ORCk@eNrr4{m3aIG~Bk8z;T$goI#{EgNJCL(?0BtiP`@qcNrF-3xv z>JHa_M2B~GO2pqW7w8Uu5uegS@b|B0+xS~dql8c}r%kl+H{N6Qncy#`QF8Z;jlY{Y zvgj_iRbBouasyKdtP1IvUV95KvE*Z|n`nR?MJkfp!^q2;o( z1QyIwPwmPU2t{}Jyznk#yMoic?qvn{+{Wha&#O(H$b-ov%kN zd&ffb(z1mW98B*M-gFV)|3r$_>fy{FdwI& zY94#7H1p%JRHe)luGaqa@C&Y655q5nZ*KBdZ1Yb+oFZgG)IYxxI=QDu_S#~#*~7!F zDb4dZHz+f(Mu{bhe|>k5hR7h!HQpRk^ZRzt!y2>OALs$GYx!_uUBXw%kJ6d{Ue@O5QKJ;C0XV^+k~lPkmo0;^3Q=DD4a7_a+n39npX}p0RrK8hj-s7{re> zkG)RNZYc~35n(XXOWc4RB{^Fr^)Jb*FFn1)`GeaTRaczIHSAyfKuz7HCTt}0>Tt;l zG}AYwrV%bwFW5P6C}!^tG*sp`gp+3R&w>H;xB2>&`y$O3?v%x}!R#S+YN6C?ZxTMI z%Gdgerj1v`I8V)pK^HcS`_J^|XE}8Xz3D(*ir%#1LFml|Ko)vaeuV%y^5&4xraeyY;Be-Y$C?rye zFK?xH64gr7mVWhIUs&Gc^LT>aFP77i*tIXACb94oU7I_My_L`no*m}>!b1bDGA!ai z#c5XX9#0+8K)Ub~e-x>bw-^zmhJo_YJydJn#~+SwLcKy(Gt8Z8hC*A#hgh038)ZWH zoh0Q&ajMu^XSFA2AZCg@Oam6RMQBp4>{_K!iFSLd@V&Gd2Zow|;&X~IDh(eY#_iNs z$Pl>7KElZ0RdzGE(Xk-I9G%!&%lG;{)U0yGA;UNZBurL?*Vsle6V5! znU&{ULiIHF5lA zKaq(|F~wwqFi_Xx?`$`{8l<87J8ocUiTgT-S1PL+mvCRyDD5q&_7rh&FQgWIUfeNF z_SJ^-Cbi977pWk*wkRcCS~%u%21n!Rix7x9%-1tndh6Kj0R7agqurd9)Bw{Kp=5EG zn~p-o$eCAJ_LiM@8ah?+hLnEm0#y_cR#oi9UBMp2z@`ukM^d53RV{7abOzTjgi#im zj-9U+JVL3RWS~HqTT#_b@&mp=0G8XteWbvF^dm#+hxpH_-;EV=qe@N9f|X?K+Q)uV zBlNsFx2k-FZw=f;*H->jvOE^dW3bpBOsxri5csZfL6yAjPw~CA;B}SJe&RoTy#o6> z5T#~cM}H1qc69oCE!ZVWfMBZ6Zf(IXN|P`qmf%2(!kx{=Oys3afqj_^C1F98=vIuu z#H36eXcvtmKy3c$v+xrz7c41Zj$}mWcZF$UsUH~2LOY2A6ipHmr_+`(44O5C+%spn zPWoDs#EL>KOD-Np`~}1w-_18btV1YG<(5kw%@FP#ObT;Xu+DFm(Y8X^LJt-yI5nh9 zy)3u{I4!i{G<2bY6PHBi`!WIz0;h(K{-gLZ#p&<7P~a5pwNRiF?WRBwXfP&r;fAIK z0*{3jJm_?%Z0RwP=bXF{k)7y`HI}^MQZ?)n4*$Z2e93khJg6e8*P2mgRl&*~{Ej0) z{(fGC@==2dOP&M8IhyEaXNZljJJbO$l}RVNoyrR)r&b)<&x3PxBPVF@37%~i%2^J* zR9&#R3SeIbr1bqgG6(QMWzz}ffI^pBc>(Ato=|u1Ee+%2ld0`vH<)u5Ao2;+jK?r^ zu73x&4$HhbqQ^sux$|&nnzbKPItpbcsW+#vPvGp5fg^eUEWDuzjTkOCl263&<>g)c z3T%YgshJaFqv0PL%kJiOB*h}N#8b;fZsw2pC2N+sKn!3epXr{#>Qs-4uU3WXVudWe zT2eJ%{i9GdOpVhesg@Xi!u#rr*ml3iFy(^~jx>8srC(I9g2->dXhpkn5inTS7c+%$*X}ck$HYIdE4^>*hKiaX*_M+Um^J?rop6#nt-diU%JD_mR4ED6D^FHe1MCta1$1iw^$O*E!!zGW2qFa(VO_A=$2q>CpzH= zhQV{mCO&;zqA{uBSDN#y?%;klwoYF+G%bb#foze#QgysPhm;Se=3dF9PAPQwQX|86 zH#@QAdH)vk|W8$+&uv@%6IH*;b>*y66>=TfiFnEqB>4~JcQ zllj#p`rsbq@E37^zF8b+u_WE({(KACRJhb{afEam!#GUiYEn^aRJ=$5Br+;`si%Iu zhv=o@KD{pt=by#_%lt|#;y971)%_qv-Qgif5NiBBzUd>Cxa-%sdxHDldUCdNMDv|! z%oJR&&c8fRWhnxb`dC_Y8W`iE>z7w+)z zCU?)>MuiBSy&@7}iXiZuVL=jxP^xe=g$@XHq$%+%Ow(JHMg=c zNK7fD`B~jOVT4Mails&c$^?f@l@kHWMtA)hZ_frf2M|uL&f7abt0bpRn#%O`HfBCa zQ7_3sWH%{U=4}I|)3P#-PH9>^h!DYfUFc*4QiRZ8Ovx*hPwM&^m~l2ix|{E|K3uN>h=ot#wpob zz(wd+^Zh8UUu5T*Qg$AE#&V`!!n@<2EejzgI1_V3X4o*x1Uo%2ag<3TD36D3{keUB)~3%H-7= zfp=B2CF!198V80_@&XM`-#-HlDgL#vsMHo{=rr*YZ#X{>&N&-VeDt(A+_Mns5V|A< zWAoOK1v(uX7V0V7*ao%({WWxx@E#R4WIDp`%F8R;I}Xoc;$?b2XgEEZAiKv#}|1rhKtr z+3+8i%72Qqxa)67!xqv_2z`&bRJE#5W1Gn7y;^qH4MkEd$|}_L5Wm zQ4hQb!r8&=i#N%53trM$&OeeRcF0=%k*zZy(3zjSNv36DB}=-!&b?jd?na#zGqKva zF)3ELh~gp_e4of}&569bIqBW9PuUALqm(-N4Bsve!U4xCXlV51NTox zMX_?Nl~ZNswE09Ri-?)uAIpwDcfEAJTsoxrQ)xuWCC$FNabn}EQ=XPS)p*BLQGaVq z;O5L^21m&#R+y&=oDv-P7_ZXNH0s)V2+qeVUrB!$L?>OjPYuSZ`vpzp`X|)=8NH;{ zu+Hg;FspKsYE@|eztj+l68q25&|%)Dn`p8BLv&3mXTa+p zxYL^|H@$?fNu#H-DkujQ(fv1cMKa+7EnvNk1_5Bb0-LTvD1uCzzJbG0nD!T)`Ggo( z$tjo2rDV!}r}}3@LfSza@6bKFZX+#LZ)~MlE7i0bh}mDa-c{s^nZob%GcWhh(|Bic zbbEh2m-gP*?OkfMC;AJPR0-AwWmE3tjAwogLM%qgp;N!2e`5Em|K{b+fH#tB6f%$qrvAuhT`-J7g%f+Gb${Zm^xud#CJ z-ao|x;O)XYt2cJ*L^Fj!Evpx4vfL?pHY_{~FNcyp_R$IvAsE3f`fG(^%>xdLQX2dUFPay=`|5y<@i6hHzQq!yeOMFX|3cJ{fdzAFhvD7 z_ERPbjFi0|!hGe`$n9{%ZZa>%;Ha?d7boYafWBA<0i+6;5qyY!wnl6?IQThxVOup2Y<$;90W0c^RWw6j5Tmw`# zdYoJjE2+I0a{Soy(WwH-*52Sf45HMU+JyOlU$bh8%%>X8KaJQqHLE3ZAGKKggD92} zKz|VFbv|j7x8Fa5T8^7D!V_G@ed{!~#GCWM#Ir;qpSR0@1Siwn+!ZBx%W<7Sylq4~ zdcqxZJ>Yd-HGB;*;Hl~VWADx5qpI$Q{~3}6O)L?j8Ir(^Oe7Yq zSVeF{tQ(RUz=9H-1i6l*vF=u_wc6U+s#S~DB_>1?78j7pD!afPMs_7(%RKMTId_(T z+VAuG{hrtB`Qw*X=H7G8J?rOu&SyV(l8*~NNHa_drV(XDrD(h06 zRq$7`L^EkkCp{LgBT!U}PDF%PU2~2`!!0)*NdmuQ{Se%0o18%!v&T@d013E6_=-T$ zNL5outGVA%e59<`s7u&XE}prGopZSe5`e`yjSfc#fLBmg;fBgbtUE8(NhB(KZ+*H(kbYy}NXo1p0^iVb;?Ilxha|IcI&nvd z*8}>b1C{|KYw2DOo;){#-q?ax79X$+PG!bq%iaBqnvVUyMqJFL9v!LjNlHkcI@0!o zJgW|-OHXF;rh_6wH^xOkB19!a(H3jzD^-luRT$NM*mb%S#ZqLC7IwcQrS@3ok|dQI zHEkS&;VjA~HiwK)>zkz8N{eX?!sAF5ofIIp+47#P#v+*H`k(6+JV!x7sA$7|jB6b@ zB#V*Rn6*(1<{od1z2&WoJ;mtPM0=cbAD<`iVz;#A=c*-mfV7tWCp$i%t5LBKlWF9n zDY{FVtG8Z5(GaVYfG`=cXM2AHqL^ORC#(e8!;gx8$>I^2i^Js{Pw=u#Z((2G9cs=r z|6uNCKl?hnLsOmJ!kp!Pgh>mgdOwz3REoU(OAb|Qu605x*RMQOO&(0 zVLbs4RFg%ct7VF?9-!m<$FTrxB`gQS;^`hZ)M2V@hPjA5b%|w^fPYRG%7Md~Q0J3s zOBJ}iPx)f2R@!B4MBKF7daJ*hua2U^OZBZ%r^0UQIhE=kf<`ZM_-y|f#{5qMhr0Q? zDxyaXz#2d`k-sPbQq3pfiTQxIweS6{4T;~R)Se4;88>$vg(0m zt}8FW{EKh~D#UwzC6Fi__vkE0#}s!;37|nqJG^_dH5~60b$&JaFB4uH)wVif^~G7| zp2%w_Uz{~7Xzf6*m6>nUL-5}bCO z&OA$J-t;n=NzP0!>fBVF`+$hP7lQ7gzpNUv6g(#2x56EmosG0c)I*A!h6KWNcj;Lt zLpc;Z+*htS|0da4S$&VJ#OWP&V|FEm_a&Jlf`c}x#l+lGAh|c>lLDuM){XtZ6dHoh zWc``}g;Wk~Wf%Gj>}4bWQuPVczQ%aNoKY(8Ov!tODNm03oWL;>)*y++uB~vJ`#3)l z5AB`Snap&QxrNC%<>$0aJ@X>mRI6Nli4^Cv9FhJ=+u99Jg4Yei$$+nb?unWj{nOVgHWht)DsyOY4 z>|T*7$LmvH3dm(yPWF*Vw0cVXk076-BLi3EqU>`Mk7fG=^W0QHVz09gvi2+2V2+BnYY3^VteoCiY>2htADGMtL022)w9|y~8GE zS6;zwJSPB6n4`r8n!$Y`u1^N(WdSmu2-1szOezudBX3QrRq(=q6s+tSOXBk=9w9Pt zcYGv+=jcBLK||7qbA#;pi>H+gK-xRx8DR5iZ#} z*h}XK|7?Zr7vZ_Buk9D%xvekk7ujgm0{cbSYwKU~66*0VE#=Y12J5??Bs6TwHfqjD z5ytFx!iRrENr{FT_0*|pk$B5GjVmwgxD?Ew09J)N#g{WKVQg2FN-gdXt|ad}I#!7~ zLIl?5K{_g8r=SN7EBcUCyoaWNf$q|n>~xO#jaDhcm8E=BC8k~3OVdI^oXA0c%&&-( zQ!b}MnMoQ*gdj--V~pg<(j~=rw&Ra1_x4@v?sN!R*-cbM30mRJ$u+7wsndWUBnKt@ z$m0zXibR5tbRX>Pw556tWmo!Mm(o|TgxoaCB5UgJvUY!{%*x5P29pyO z8(lJu8o3u&WR*ivwlQ{**-!MJnba z$@P=P4z#8E)-t1G&(sironz7I4&-U|R^}MKK>kb)cMr=SZ=;jmKzkJeo#U(;7N`s< zcsEcU=lMu81z}fkR0-6Sq zx?UnBfQtMu7D~s{69_U}=^}CA>O@kih_qNY{+xrH@^b$13@ghuYPuo)O2mD!_aPx} ziHVlUtvvDgvy0uK=LKMFV4)+P5AB`0F-HH@tUfMYyg&0t@0XHja+&@uVl&kpfqkM{ z9i=D{leOPhwNXa1cdWJ zlJ1Zlv1oxp&%eS~d2ykv19+#oEZfPiTc8t+n{IW>qhlc8TzMU0rSd9rmN}mY5^Rrd zUq!uOtpH_Oy&Gi$i>BN{!U9R?LV_oBZ_n`MM`IGBpPi8`QxSQ$>k5%_AYjg#H zMR7qLuYM>HF>Lu|pfTNBxYdXDMDA_&A+L&BSLyGz%8J2oF)LM1lZ5G>vi*vjoL7&B za0-Hx7BN0jrXL7@p)`t~N2dc}b|wBZnk$Pi*$IGXPoLBGeAqN@n8IdxsBm}Q;9Z%X zImevrJcWroM#=a16yeLbw=&JT%Zx@Z3k}C$4>Pf!EB+Ey)k1kyx^#0b7)4ys!3<+} z%N8BanFHns!adx^NM>f@PMD&MFjl(<9G-Rt;js`3470!K!#BD-nBz9Yno}~%2kaKM zi%IZ0TVr4gf4+zf&(GVH*Z?9` z3Fcb~dBoD6$@msJ-NSA}9wCv(>isUNZ-fG~dc(K~8vchscyrpBG@*u$I;^=^A*|pa z7MiBmi;5o^&}{u1z!Pa8(w>6#nSz@8zi0?*CdfMYxEcA*%5LW^ z2D*xT=M>OKPk8l3TD-GE4R{;_j?CBiBPQSJ^qh^4he;;*xahVUK!P8=4H8Ue6frj} z*57TmMv>iBtD#J4?&wT5a!SI7lbX}=^KtqTan-pzFr2gbiLc}JTd4c!&25;4$@H-0 zqP#HL$t&bQ%l4Wx5cU=1-obtdby;v!`0kBj`??y>534cx!zTGqzL+9koV`hX@vPEQ zs4xC3U%1q6Tw~!mmJnD*RSe1#Xv`^ zl3Iw2oNA}l`5Up?LTp|$oMyb{O%MlxXa{vNh-f4ioQ*UC$%)%suOmLBLl#K5fO@s< zB6*4JPz#`xVRyp$FZ>5nTyAa?7-dVOmz&>u3*k|H_j+-B7W5&cz4RX^XzQiXO5^YxTODBtdlC^2z0lS7@mXSQ2^gj9ZCW<-m& zrOwN!6CpI62hBqVh3{s~RbASEvv<5MK?S+7+QcMtEVZesIeSjOL1tIz(=O@>6Ayr3 zXo6}8e}8>jv{4_XrzK`X*=N+FM`-mCvM7T|hXa^ORW0u2SaJ?J;x7EGa=qIew8Cv> zAo4y;I7a96uOkE#YK`$5Si2h4lRzHg+Y;k^Je-Cr2!b1vwT@f@uFJ}=G$s0_&YY5f__~Z1k36CC^k>Cyt zU*hm}7^f|J&D144g^HqLOd0BN{*BNX>Nd2%`?6!ijl9BL*do5zM4pmVM& z`ko!+khx7urDkv80VI~8v8kTIl^$ahonPZ_S?+c&C%|0KN|(7HEM!zu+Q)q5v~D;8 zA!F>RKVZyGt0M(`{F+q9UtpL++au?pzCAGY?|iB3aT~%NguUceZBJlR>m=jO`box= zdW?D6((x%9pVtA0rFuXM0*6ot?Irk9&Nr2hi*BUrL9InJc%8T~$@1To#1tyxa-Gy!b)gwaPSf70RoMwvM~G272-cX z5(l}X=dyxAcNMOwJPmmIM&6MlX3UnIleSG>SCHG_2 z3{XD(H9&a}>!;-|;dcda?DMXw=x)Aoa9?XEZS{ZzJ}072(L|{&Sa2&p;rzpRngGwp ze|n6QbZ&B$GQ;IrQMlv6Q=}82qVHN&t7lKNw}Ni9l2-XXNtdG5|NnnaA{W7D|EK4J zcn^ORQy#P6CHV;z4FjcwFI=ra;s@HNys69omY??ijylYy@Jk4|Qto5%MUy1#kHi89 zrOi8=7J^ULlVH7*2^@#IY!R;x6P6>?W3yN->EjL`f;HH#d&K+IItL0^ImBYApxMSn z=k!ea#EKw`!tzU}rBhcP*$n~@W?D_F9_UP8 zyEBzCyYT9EcYc%Tj!Z8+ebFWa2)aMVsFVKOCmjhE%s#O{8SVP>+H(2O?vK;%&oeBa zjg-V7D@J+i--?^uI11B4V6mzdTzqx=0z!X_G$NbxQkg~;yvi-u91QG$v=;)4@v`9Ea4&) z+@3L8h4jGSr{GWUMX~WynEuutU!R`OeWH|+@k^DTylIbLtxlqUb8(A__YYSSHU4;g zm;M%WM3GDCPX0mF*kb)cYMe@q+WxEkde^7N*XLvTGL*LWYkt&vOHily`dn`JFH9*b z=STYYDCc>+e|PEnUX0iG?)9_PNAk2~YWUCjF%EVtU?G0qv|dGNK5F@ZAER_HpUhXESTiUsvL@@1BD!&S z95^XB6G=~HCj8V=8oeg`J^fG{axC0mu~T-tSh)J&3awjDl|iKF$SKF4Pl`X5a51I9 zg30|yqnX6fw&*&niC}zN9H>LrMY0kAT`Qs!_nae+VrULr)2laT@+d8BSArpBUE10e z1ZE;8vQ1yka);(cqnPj^-Rrx^<6Ic|H_?0${lpmtw>CWyUVe?)&=AhbYq+&RUJ7PiHE$(SnbcdCsJtZ&JG1hZ=$BjTB`I%_ ze9$GYq4d_UymMK2gJrnpY@;oQD{mpvfXO+8qXdN+CH*p9=W3wt&^s^_Bh1gl;%_5# z^(lII!$Y`8aiPAkz91z zSCMVPW~L8At?0jAd7?htxwL z=-KfZ`aC{EpF}!ZUBH=<)g@-n*I{*}ZI^i-XHBx?PxXd*14Je)l^xO}Puz*bszm(d z*8Ls1mPE)47c;xcUWe@oLr`Y!GY`yZlQCD8+snr;3t+G04nEYXS8=fq7QEVisrQYH zEolZZLrdqSMmmU$p#j1^+eP#Xn=)s1bbr3!wpIGmM{e>H%TIA%8ZG^~kp4v8mh(#d z`LW>%6`V}FkyGOP2g1oWA#ax}nEFDIf{LLcfG&2N2n`2nS-V8JOtV3Q!(JD_@B;p9 z0K;PI9hBtB<)*EUY{$fT{PFvp@r1g}?t8KW_BLuvstx9+_g!h`!`9ZPxCUi{U6OYN&PQo|DWLrez-x;W7M}@g_p1fWg)Ix+mzO??`YW?y}I)mE7yxx)Y=`y zIk3~JwkTduBLG4z$iQ;zy6}OO1Q~_jR`h~-ISdT-99!$q~PUpwXkC*_!03$v5*BHJ|h{3SLUKPA7J$PM4UcI@t6GDn7 z0Rd1WYi|UeQQ+{M*g(%Y6y@b3`G>$P;h0f~9iGV>x&r|bf~ehM{U4%G!`5`;-KFkM zVYi9&a0bHod{Ak3rKPaGUMAQd+}dEFYTmSWZ<*0>`$<`shEgR=GhrAkOea{!Ob1F| zkJF&2+@Px?^GuInWqbHUt0j5L9rCe>maj|vIr;-%PmH1zS9 zYdsA+yL&>z&wy2y9pBwkc-TK!yv8L|Tg{(UJwNksh4ZDs($gezi4IV~&JxXhD0THU zjh;D&R1Y7J$Q9)wS5#-ZuxZM4cm}MKuS-F9BaDX9)3UtI)tGGAV`_9e&!98j0qf)@ z63S5I;w(bdVYZ`+6dR8>yPZUKQyuP|=r9JGsr?H3Nz#x%+&%uU37z26rUBL%rcpl!tsAi9H zkL1tQ1>k4#>~%_&LYfhM>JD- zvM)4hE|CvVgAYN|S;jP00RaX1HAKshXKV23@bXbzRvQ74v1)GlH}%N4jmq(Xs(fof=GO$Q2+IHsc%7C+EASB3ukXS-$&}i|OnSL_u@^_1meoL78N3MW*lueIgd%Zkx^%$1 zir)+oMI~DTc|!MPSRWV^(lAr==`l7E5f+NJgj5gTzLOqyrZ<|`EvAoTF4UP1Vtp)o zrOHv4K@mYVeRP4Bb%C{yNdb1DqppixpzUQxky9sMO3LU}Ot6Wo>DM0F>Oa>KEX z$fS#>o^CF$T3iLz#yiA+s<|?-y;#J!=~Y{17r*N?mwSYCt0HFOh|@In|}WIw|d8!kk2|%-a>dlV$gW9?y|lB#JN#I;FmJlJTFkaTkj)d$wql z*qp_TLv7Aty%BRsmyIY2w+hkg96A~JfNHHMoi{VxGhl<)ya0!b6$d&yl+!{lPs47s zb4A-92bLV!0!t!}YD6sK4PM|0&5cGM;cSxAZ~D3F3o+U2hvuAf%C!FyS>CqHqK zs5APJ4BtE8m<%9$ug5tO7~O5nJw=vuBVsjc9jogx$2(l_VtTuEuV>CkLDRZZ6tS+M zXxY4byGRAO5sZ~vtoN`Ph{ILjRgeILtcq(3x@m_o80e>0v3E~;lD&KRi)u5^M_tRI zdkoQ6om{}DaYgq(%9bfs`W7||3*J-d@85K~=BDDiAFlUQZ4Z)Y)z`-poYfnOCh7~fpC{`= zsL#lMAZk%Ai$>AzMz}X|oRhahF@PontFIedEoq{|-)AWCyyWBQ9~UXx@)T`$p0v09 z=h+^tz37NsPEsZ3>L(oz*;{rcNE()37Df9x%Cb2*p6$=JN1EgE5JhVXJ>c(BHM5I& zFEy29IIe*}oS9*L)`@{xNg}^?_{Yskxr2YtsR9OJqjH{>UrsZk3Ug^oXN=6ep$}DV zdGMBWIht#|h1-nk4&-oSy+Z>1aVT$5_6nU==tFtEZIDiS-oaOk5rwSeSausFE8U$4 zLU-Qrc7vXj3Ao}-$n;%L+-6a^Jae;*Q*4^h&5RYrYI;mFlW7;37B3Rl$zZ2tCMjfY zS^}L7;gX3i5?6|z;lZ5AFi%81{zoF+s6L%!C=rRYBU0mO<)QAYv0et>X?q5=)a~dI z#_#j0u{Q;_GxqIz>@9lio9!q9q$`4CpC(B5xjVJEoktkF4AgD@Ol|@67JljfW2B?k z+!oo&R?&u+SZzU8undm8FBSZ%`FU_JqIoBP6xcsrL+wq!juX+3J+HxZIORq zskumcG+<){}3 zf@P2!b@DMIyCNOLxs1}@!j(pK249r(I$*CSP~VE*cUI+gTxn zZck5FmF+gF!>Y2Ko&ozkfdy2C!gh#M_D|Bi5-Dm^E7$W7EmCOFC6&Y7&SfAlo||;J zTWQowSeWC=jY%q3BK2KQed5JNm!0J(c;3T=Q2s7~IP`?d(E=@k`#{57?{sn82ezU- zK9o^8GA|l_9e{%GdgNUTUI_C!&uf0`u5Y=2MZ4T(2Y zfjchp9*_CZwlFv0lCv%R2h{TrOZTi%w9(&-p^cOk%CV^f@mM+mBg=7TjFdle;Bs2#fSt6T5a-ba7c8F8#araRQL+z2vWJ9Ta z*rRqtL#UX|N-eVy)y9hmxaC5g;Dss}2tpwi+FHwrxae>oKom+Y05UUH6&E z^YjqbyBVGEL7K{Z{Yf>Qr|Qh-7m*pelkzd0)vFwG^{OD`>Pt_^fZNeKlhF{`2kp=p zTMyoVzKCBfafO(v;hm}OY_nu%hX{J^#(Xkc)K&-$w}}~9R^&o`9=by=I2$2Wxa`-R zl96M35+hIFZtWrBt@vJghus87R|l9fn$3pbW(FWQ_%0V39g3ba)^b9MK*fD76i#MZq zR-l>U;c^A+(%nESJSDc4?1GFOy;ZOQ#~!ra=_y2>w1svq4dx@EXii`={Ix<80EPAL z<-0Hz3rj4Q&)jDBL)dd6?%acHN$k(wPkiAHcI5!(ILC9XdxKc&txRNszd`M%g4(e^ zYsSP(zhF$(JoN&R+qfgwC6_Lh-sUfN56eF!)@Astqd-;P%O~_C#ErPYp8MRxt_>^u zGxYXt)Wg+P!QsoXOhXhpXgT(0{9>2ZG`M3IDsaYH1Y%5<7yf=I1v5+p%i%?6;ie((=OZjOw*0k6iP5<|wCF zv!VJ+U%lveCd&F{LSJ!poKc;wfb4bwvTJj)%gxV{M~oR`t1)*v^*|Km#DM40U@C@U z3V6gAT>c#$SSq-wFiIFxPgn&5?hDyyDiVDkAE%cBU}1(r;-$b0A%Mr9&%Od7 z+j@a&uwcf!fc+%Opx?WKxs9%3EBBhDLe;pTW!N2(aG1j2!tUR{pdAXz7+H zQq23cqDT|1^aiIyuA2hgbOzHB!4n<`h9#kzqhhkD9kTa$UM>CU7bGpr|_&u#qj zBlG=5Uvs3z27)lg0wIn+hZ)4x^@ zuJ6Loj-XUM7U(F3HL1k{n8{S6RCacb+c3=}dRaGEsXd zhs8d5O_=2N{hWfZ$Qkvn;GlX>@Y;GpbQJC(Hb&uEUxmIl9A;W1f{@t6s+p$mEL~S_ zlr0S$Ybz%ProjWB^EuVxlQ0c2FCwd@9jKW#Xzf zQrTC-xe^4ep8hU;LG{=5EnS-*@7mrFayMH)S{j@U`u#fEsr4XHZcG8{BZwXoo^;)r^ zGsVT%i#^k}N9`FzPw_nieW#*ayIW+VAcs-{sc`CHN;VVUpTFSP%+RG#iOKvs=?0dm z*)Y7nbH+BZ88dct@DL4l@#$7^MQb+__ZQ@6m|O-+A7)8khq zay*3_jR#bSadWk|Q>!R(@iied+n1h}mPki=aJ94mPm4S&cDM+BvSO3V90|3$iJJsd zi#dk-&)T3fwQ-ONX>L>_^cMW~ zvWvCaFq^d+@SsY#<5RU7ep%V);(Op*o%yo1jK1#+GWFfghpgo~%hXwK3G?!%Lbsd6 zF5_)-llhls`%dRknQWZC4R{U6oXbu>Aa;OQG!2x%gp85@8shX@vs1xgv&)9Z2H>&5 zAI2KocLTRICEH3Vc^A^@EXgbr?}?IhIlMq^t=LNh4z;PrHXi|+m@@PVTdX4ZK#uu- zLPZ&wDRQ=t&g5u!$<5G_w_;0dJ4e0%>H?e(-Of-&{jY(4v8m>7j?P?u37IjFeVwc% zVJ?qjgA<@nHxLs7DM{M2sF%%Lx|d5d>V0B`5KNsXlHj88WLU)+AR0R@M$fkY{=($% zJ5zRVAa$No)o`ZN(1t1{k*ehF;;$<9XEfr7RyeGg-w4y!r0vkQw|Rw|ylZV|u|vxg zdf&D`tHS<-+f+(7s;-6;#%D9Ru;oPKvy>#`vk%Li-=qADbK)|-pEFC=#Cw6h*DxAM zP$0xx)%W2LQcpyy3SUwF%C-u2tI&r6F$=71>#<8N$&)9nAV3U7NCIWK7Jpqb>v;!9)E;ec~jcqV^7F! zW901P$;u@$n6{Hg%anep}a@xdrs>6I#J(PXz0t#L*|hgwV;NiZ0>F=`SGwWD!pe z-^8Z0;qt(Edhx3veT9$`!J?idSj{*FH>agZ8UApT>JWPj6)innAv&({;RP5NQNzZ= zd~g~c2Z~nl==41uSwph-JT+0K$b*DS(5Ui->g3HX@X7uxnn9ITBh@vf)l!A1eL`I- z4_~fq=4K}*Q_BC4{oJ@D4PHC>Q5G;gka{qjS#`G9F%#JfL&`phDr*Jt!&lfEZlhWZ z!rW$R4hDd8#Wr?}_;H;Adj#7kv4p4Ci8fuX``MwNTMajtYq5PecF?pYt`x@q;B<$O(R? z=plWd8X1jb81p5^ABk79^6$FSp`ro;K2XiQR1@pgb>dl)*iX`p(%`#dvWh5-`SsAD zyj_uBBzBXM54xE>A{;j|N6HQzBShN{y;&(iF^d*p=nIaH#14VDF?+o=5U(4dCnEpy znqr4p^y`Bx%2#YROx>_c44vyb=%oh-BV7d_pcQh8>55eO3oIbNKb}5WhnIOWeKJ^e zDn_4R{}X{eIp%RHL7Id|u$RE5>gm95FN9Wk#|Nzvz3+H(E@^%sTyWEdB|$r4@sIuGieA6-XeZ25qOyn}Ikgo(uW4lejYzIcq-80We3j`3!>^VZ9c z`;r{h;q=lR>v&E2Hse^>;BSIh~6}Fr(j=8uaC&j?d5aFAw8E z0$xA_?8X}*+p94$YQIwRGixH#?Bezzgj`v+2%a)ZupW8GE&kzL)2u#L-yu0KLk_C%vD`AvxHr|cM05GOGPlS!RcnVpazQ^Lnyy-kdV^XU!QWQI0AjhEtb0m45 zH{zWj=I}y{)N9V`cbHAv=J~6w;rGJ!NqlM0Rpe2vKN_oj!8kcE!u0=&U2;HniNAz% zvHEl2kl`i8sQZnL!b>I_bz^x2-6^}pHtPO(J$T|v%Wt(sFEy#BDJ5pj4URE(fXrW{ zXQM7)%gni(E%WKj_FG2dlPkuSi=^+UH$)(f;v|B!*8Cm_xIiS$G14=>{M{0SoUDGj ze+3u0V_$g>AQ0KA84P8%m&|cNe}cF2b_GwaS$t2Q$hkj4Ur9F9)si_9N4m;fwAc7& zH(w8-u0FRGV*(C!rD|9oiP(}Q+c8hchkjyyb3RL&-!Vj)_@Vg~6lu@z1bmN~-yY25 ziSzr$HhX?|_|F-@7xF7p>DWcVKlCp)7g`OvHhj z%JbE&;762VKh*szX2W5v{$rcmXy{A3%=8_}bTwfxGLyDBsjw zEFZo>=MtM+okvOMNtyY%i(VLWMYqdEp!&{q@EDOwcP*7^D}#!u_hEm2Z)S6u=v^G1 zIY-5Ayz-o|h_1Yx6V_p_NfUD*4-$|QOHRF`yzmoat{A?1xm{<1hd@7jaT=d?rMWh+ zr6a;G|7aBn+om5%(U4t)tCyI2OwIEaw)wgs`^E|VN?1?dF=Fs+tsEfox55q6PL*pC zx`>faN4YYO{oQcl=`rvgH8p(@jn!SI`mDqwkKZHCr$Tge)&*uiEprFPa@5_#KUO6L z$o+PP`h1fJ!tBqjab(5h0SHU?l_Dx7zOFG>M3-d)7L~H1M)hBKadG+5)Iwk)lk1d} zaE#x@Zs#GCj_8jYjJYq;7S0SD&Gk~|i&#eB0Wt_`X)f@zd|B>XAO@v@Eh)af-q3`2 zK{@YH7JAHltZ2pSVL{Zt8=-fKasL2Xx9iIVm~vzNB@RymN6Qt>mOSm1r@ivT`E^Be zt2@{owz9kQhM*4h;P=gRqtvVqY&z;zb}yfEC}1+frBmj97aanQGb&Sw4>;8w*nSib z<}ciH7F0>!7b|!}i?*Z|tCb}$qB#B{6RVXkvL)?JNvoH%A(B=sY0D&SlcZHh+C)hc zTa_h}G{xxp&=*p%nxR0`QUA;3NKS_#HT{BS7c+S5Cbpz;Hpk5gnIGynU+OrYI`X9X zyli@p!OMO>_if`7@GPQeC0gh(U9EH$=9+b4*eCV@cTR7=fr#I(78_;o)N-c98$2Ba zFj0v+_(!OtD6igiu-@rA4;x^Xtl14E+Okp1rh?tqdW{k2@K)(G_kjI4$E@-eF5st8 zd>O83EaxJSz~(l)qDU7dKL|8-@D?udy-E!Wj{n*d9FDq@pe*|^$d-DI5nMB|&Rw`_ zYKq5f939Lcdy#v0-o`C3NA%_8-YQ!z)>EX}{90LLxbDU3#( z%?btRaE5Eab|QMpq$R*RHK&Q6P+DY%$laf|BoRBsEG}Wo?i{Sz#5+Vsj(m~JGs{M} zInf#uZyGe|=2~Xf!Q2qNJX7LP*@82p`aa6K=wiA9tqXhu`1N8n^P_2Hc=Gm|X5nM&9Nrj#cUdYj8RKLwL0$=F78JX4&gkRurC@Dg2j7HFU58ySE(5089 zmj_3tSIx|IpxXM(Wz7G3&IUg8az7VdF;VPDu5?9zDYh*D#>_15u-TcuQ;oW@5>=|( zLyALMDvFfg%xppT_XkTdkY<~sGcXKz_-@EDVzx8mNaqUXAiHB7o})N->S+6&F;aqW zt&>O%yLj^k^YK7wvA#ldDdRXntDw$nSPR-?l;S#}j!8B^%rb@p8IT|YC2J@=O8J% z4zVZ_kE+7eZ0OYtNmS;4z=g!sl-ircV8^I@)fS&+T2mFV3HW}J9oH_ceM;fLJK?zM ze#?k7JWZaOfG6ea4n=40{vRCt>hJ&S;Qu8>eq``#p8v7I3xjf%XIDqujKFLuyxZc-+Cne5?H{H1ucaYP4N^%=pTxKUJWSKpleLB>Yyi z7fZGaG?k-#p@rl5nGlXWpc2HQEOCVMC^9_Sk-gV>YU58L~qB zN>LH`mgbkL=JD^uK&$yg&{v{81^Ob2TXo>S0>C>`^mE7$ z^%K-{IXGHvbYQcmg3X`armauJw`m-R|Fcd3aYaIF!;w_VddvEVKUwnnOjMw4gGW;b zgdJONqpW4LT_W##4+`I3`*RVTu)d?8P`kY3Q zE%(_ZUOp}`UzWN}M52FYtJCOkbS%fE!gELg68P$N9_1wMXT8*&$cl0^IBme(a@DV*CyBEpe#47jEa$NJo^8DcnnpK3pFOliJ6dMO?BJSx--e3%D?sbFCZ+TPJT}3i@P(-y+A! zW(N#7qzIuV@HTOjA{!A#MljZ`tbOpuTftllQHx*|H zx?F)_uJeTYqeghA&nm#@KN%wnmMMJW3+$q5o6WXQMMqp_0TVi)O?L9$U7_ z(F&N7nZ*vmLdR>n;WRbKuZcLVt1m~3PDhqX2x=+J99AtL$XoTV_$F_c%|9FSuNc^q z4|G7t7gXVe5*)R7>NjC=oQ2tAiH!HR3^(o{q239tQbRS3yjZ)=7YNjy^veCks*_BC zH`HZWJ{)rNm$9yXNO|3}cYCOw<;A=D()}tSJE^N*HAq)q19p==m?zb?@9*xtGPqcO zBeR*jalz3OnKHsIF=b$e$!;@02d7f6I1K4SydavwH`HMWpQ=R#`lCtFGs|U#ijkQtA%~>G3q3Ywx$KIolIG-#;lSJewaFgD5tDR zx4^NILA;HrXwf0G_!gUrqsu%mt7TJX{KfUTufB#;i zj@zbq4;**Qy7{=+03aSV>beO=$-=hyUhM`>=5#wVvbol%tEBx#XQ1u2+5G}-{e71N z+HUb*PkkW29RLZJ?8FZnDay8qRO?>6+zXUb zD|$TY4kJ~N30L;}i3u;D=SnMO{Q~{3b?PMMRF8LTMX9;8%xuD-*IXq;Ap2m5wRtde zyX(Hos@F+a=z`NLWl02GnPJBCCmQ3%i~C?X3jVlIWtA#N-3O%?p@Mm1R9alwguiw8 zE}eDWuVOV(&o!jc%7uI~WP(a3v|GJZYFFQz^emmOs@KC3k}}*^YEz?B6sCp`(Z_S8 zpo-NFEq*}m{K{e=RP*{r3T8CE{e~$B96Q-~;8{QqFswqK4Z#}fa<-UB-IzMZ?OYVy zM8?~MQHqS=VRTO(M)EL{kYkY|9xCVakk7*vSPewD=&r}1yLliyQ;%D(;{lCqkH9Z@ zfJ5!^y8%3OmSo3&h+{cuEGPmbo)@hBBFmsGI&sBz=_<%l~tTW$gFdotOQBWv(?MDBUelX5kbp zwwF4oFCxE^@w=NlN;eThrpKn@`$yC;#@SO5-%)lvqqdKy@PUf7d~!$We`EZDml^Ro z{+Uz!|8%IL=*!|NSVR+J4wH1R`B?Sew@hT%RH*iK`8NhWm4Tt zvs?8)I81~NY1XTr&U#vPF?oI{q@dq!V$`QJ;EFOQMA9HyeB<;#K zs={l+Uq77eK^^ypW_6^Fwsh7fI5Jnn@*te+vb?vo4)r)UCf45_97eg7l#{cD$EEsF zMqV$EfySZU!Xy4wp5~FcG;3>*;^$%0sL9DOJ-I(F_<%@5-6ucrB0qivHSel-^C@{( zlGC%UQ9EGDKiC1ckf?XS`8NpuvgLQ;H}rD%4NY0j$|vU>Nm5akW{O z0e|A-A)DT##U|(GC0R4!Z-YP(IT;d;HY;0|_92px>*%h{$(*`bNZ~M3=z$Q;Di>KD zHghqzpZdzX5n>ZgSk#pmbb3-!W9~;v5H@9=%q|~BvgOlEPvL4~?laOA)o3cW3rJ>) z><$}y+rqiWr>Pk@`V2Er1YM+OV950tp-<;lM5EfI7TOloyq^H>jgcWInk*~nqkF|x zq>Iz;X=klGm#@{4vcZ}~?P^HjJb}Vc(SH5UkI&&+8EEugnO2nAeu-I8$uX7jbT=ym9#;k@%l9hzdLZhZA))H?6 zd+Rrk%3=(3Zd7)i1(HR*04^;6i(t>Qa z8K{+X@%|17946yF*>AGnwV=~&{=(XDJ)#$f2__59RZz>a$=*o=NklgZrcn4vuFt4? zQQ9D{Lv&g2nsn<=d}?OtAc8SwkKZ^vi#Slaa-2V)KrT$qKi(AmSk|`jUJBztUtA3dqOt$8YBgi2qiz^?8jzC_8^zy7kslw1VcvKi~`4??ttK z%VlvZ00|9^FI}gA;v}8KRP8o`+`{TVhDEp6KV1okXqd2h}nWgUqRpo?3^j z!AVLm(I)cLF}S0!`I}O+l(o96YIq-r#nn;GEGzmqE|#(D{M2k;=Wtc$Cy6?b1eEZn zC*~Q0`luQ`f%QT_TKlTVF;C(V+?|>tRArJ8Z?e6Wi0SP1u05{3Pou{eLsEOeA8l_A z?J->4-s2tqXYG9p&XnUhHfPr!-`<%&+FsY>_HurzJ^XKx+-kLx^`MA{*{7WMjotxU zBPYvTeIHx8nR@BIYLj7iRMWg&mhSQUW$8M~(v^pX_hh1bI(&TEV!h9&9w!GtYcf)g z$wL$R6O;hCVp){Q4Q@%7vxyu{{!mzqn&J+PnIQ%(;JpPx+7O-Wgi9trlnnkh+VVWh zzMoCqFDVzRP4WFod{QIdU=#2)_Q=zS$*!-LRJn31DWWiI>Ov9TM7nWFq5n+0j=Y`P zq6Ogm-e^6AfEl0=R+0E2g&Zj|q2riHq0y+z+J;Sx zC;0q41}2gwsA+CLQ~F57b6Za!J(c}wZWLaGvwKT4lNix(6^2^xfdC+540C!AM|C?g_wJ?Q++k>+MJG}%oOiexyKa)+Ck_j^Cu}j=e?$%(iRz*_#NOA1;d+7G3&DL@Ad}$u%uE-dv zc$~v`0eZ#QxYT}-@wmOz$aoyOzV3QFQs1>X)$rto~$GBK@9v?D(`OCI% zi~Z64?g>dSUx$6?EQ{&+NN@M{`@PxQsm0xQm2BAAQ-0t+#^G7^Re?K}E_$dFpBKY_Q!q{n04kq-Im!lTp^d_yCIUA|&*?gt?N$zmyJ)qqiKsh8-Y zkYQy;D+q4OwTUg((`1%a@A9AG4&DJq*yUd$cEG!&ga2>Q!XWlI7Af85Y?oR0*4X#E zav$mvZ>KNW#%`l@xlGSh?eW0^bw4D>R>oj%L#gYe1MDF3HYEH zd=O2>2Qm6k3#UBhe~@ult3+oBhUJntcxv1p2ae;nPtXl(eTpneLhURJMFJd;m%n4A z14r}U;E@Jm$InlIqo&v0+;4okf}$jNoY4OK`8h!Y+QTDu3UU%!yXyoVdHP|1d@lxr zD4rru#9ksn8zAfD5Mf#>m1ldH2m%@GO%Kukd22pjs)huQF# z_TBODmmaO)L{jjVMdtqn@K>009{o4?8=j+&qmMrN-vht)_>1!hR#z##k?FL*K3puSJo+iAPK zqSJrq{EDyl^eOs2;ZNRY7stU#qYKMkZ|J6E5~voM zRnDKZy|Vv>9z`7_3_*@c&;;nZ<4&5`Jp6AFpy%88vi*5u>GeMGKH@P`e*8Y-Nb-He z6IgGMnBMN6|2wwY5OMANj-cn%QIq6~r}kg_^;SkoEwW!FO3~4z4gUjdO42c5b18`h zN>DAy%dhMYEM&+`LZ=wN;>(%h!-@iiWowLKIXh{?lTBDxEZeLGE_6+_00?x4H23!s ztk_9yb16Dsgj;bU=F#|DWGQ8exy$6BIwFR}Ht(Bl=a(DI)ELOUWKCYF?V+#;V3 zH0N5CN(MC3Z7rF=R0d`ab@)y~FsGfF8?g(s@?22TvqmSA%Q62|r9Wua9_A?f&b~=# z>yCBBw2#Kh`pbc1(A{08NU|6{iv`3uc;wZ__HsCg zq&Lu2HLI5cHf%$*>Rtqv)n70q*TH6|zerdd?nmCiG^^SWPC`L>pqaB=SOa8RLm_6B zUnIULX3v~R*TjO=it88AazZHW&D$zO->78ebOV(JJ)MeR*XsCN5u?j}XEP@vTlb$6 zE|7Uv_Cs3p@vpt~9aWS}v|Y-mmert&^Q4MT?opEX8Z1@m%S=&`%+cRWlyAb{NV;6n zs23p?YCn7}g%aIOreB#~i5EcwbvK9}agX)pwDuF4y(=NBQWLsyiY*)C(&B-x2XF5M zxwpgWMAw99u9gS}TvwMZ{9$ap+@aBJMqLTv4;!Q2!p+mtL+)jPL!GDY7=DreMx!nz zP`@nD+G+YYLOV=EuXU4%L+0Hfg0_(pn@TV<`ibxRCBA2+65qSqeP8fuHyPdzYq`Ju z@Lb=@@J5bH2&JNTiBM|fIFU*fR~-XdHp-XGb&oyAc#2D;Y$Y-LRy%kV9FOqy&g-SY zgVsbE6r>iD#Vw0SgQJjI0ioN)`0HWS;HcaYO{uv=Q64m?q-yveM40Pk-S-}7ktMax zIIiEOszyYJN~gWna#IUtr9BaJ4;pBLxfWSoe7p2u)DS(03N{GqEL~fO`(E{R!WuDjll8jd&Y;87gS=>Pf!+xQf5tsm9F4kul4ze3qIa0J5~y zc90VN=^^W`okI;`ITf3=XQ@HDs)j2TlUGv#6LtZpY@>Cp?o2k%C&2zhx|UcU_ndEE z1wd{6K&W6fF>c;R8t<@P1?~lYckt$_x)|iJo+o9qUMt9O04gqw7&9p=@$VE?oWw&< z&|zs#Z^iTAwXL-7ZPsrp+fVJNOaNs9v_rNV_ezn%r7@RiZ7RzfGiTL#?-fGd;xK`cQdyJb3o+ceh`?o=J@=9JmgE z`+-A6T3t3-j}S?Yw7OY9UtE!b#7veu-~atsj^}sse|O7fcj1ak6e}fYKM4tT+mWH3 zluO>{dO1EjGs!re_%k*igd zYpg$!1^(3b04vgpK;VNepJ%k7hFmX;Ac^Un^s(rYK<2tfQR8()vI-_&Fok zaZ(Il1hMcX^qxfSI{jaw81VJ+7Q!{bzS)d&I%&LMj!FeaWNfgK&vsD$T~teUQ)$+N z@5oDc+BK@L0}QNtNKp4rSHUE}!eb3sDB#3H0TegiPv`rllA%roBbo6{ukYgxrA;0u zGeMYYQP5$R(>yA{2Vf-f|!NKBCYf4M|ptxBdw!y7Svhk5`n6iUxi}{8GXLom-o28#; z%4dZ=bBxCuvm5=Vxq{QAS5IRbK<4L-bg9GJ+RI&8P9@PV>i$7YDzVXN9+fMTr<}lU zLWjt-DC)$E3Smyf!Es+XSj$H^+B}65vMUXHdwtq)m_unc2P2ma9QLGK%t6)4X-QoD zTu<;W=%}6U0c)pq@uc(y8!DWjw@xt^3!${)he-jLXPY`C#4`X;1WJ5dnTx$aZ2?*m8LAvVKuH{i#^ zF*q8F!IAIQBx9q6>g-+?3;nIn>W`RKhkL~~ZmyVib&|1B$Wi+wP=3UuQn4GgqI&cXkLu>1ubKJ45Fl`5Cz}W1>UD zV#X&}jvQ}45{^9HmK@RzBm53iGplBuAMNb##7|piF7&`uIA&*pG~%ECi2F?-Kj=19 z#!+aG{w zM%x3u^p_vA2kJIUM%JT2)+b>P^t+4H;vJl+GUwdR3dHPzrt2&xG`KbFJfuEz3u8-C zK@_WZucWWOQ^iJxP-T}AYCtwf2ML3Ab&~Q*;WLGwf@w8xDz_LPTnX2DG+AY`y)Ygy zo1_l#l*WmIf})!eq=hYCLVka_I&KS!s7E~7`tz~nIKC2Bbfxq}{6`fsqpv5czIIc% zMRoW0MXJ00bxEznwfV{N3i!80vL<2~J3HNa8(Kv9O~I;R10*I`5<#Sx?IP|Qp|rOi z#3zj)PASGio7h~y*h4KXf@E*Cm1M{G`p`C~5hrE$G&(L|j?7Yb;PlY}W6&(P$>5J3Oo1*dr z@6yjzI&T}w8tLZ@mA9XHA@2j7w^-%9lT#(B(&P%A{hZGJo1Hx`mMyN-e0{IZ4%*qZ zv1~k|B>Os@eY>6gw9fYZmaGz;b-A7OqMbE?tX!RSp`G=romEO!7oByco%N=jHGr)B zscHfocGi1#7H-5vt8~^j)G)lIR;Yrjcym?VcM-&(zS7&at1*U26ffu`*+>{TWTq4w z&oAVmdy0z((OoFz#Ey&Xa&lE061~#wU0$Q?2nDlJjOXh4?a6OLdIQ4gqee{%rHZS* zcPb{!W3D%9B%Uo?G1#De0M26Qsz$(nVbpiAw=m+{7aV3iYYGvOYW@^X=I@@{5RU;rZnNzGs=Q@q`HEGaz+Mc9qq2*{yK;tKpw2< zQ;ffII4Hx9_1_#6QR{;VQ6YEd&7q5CBPVEv#7`|V@9S<<-%Q0H6Ht(NQt53`cJOZO zM&EYI)r=RC#RL!TZ}%pj-h}&-sH|ec>-b2fFIy<=R}0nj`3jk+?37u<`N>I{RiJ`* zl$*zdE2zN-I2!2>9jQb7fmq@&?O_&`#{eXE*ZNcUOa;y?cYOE&AXD11PMug>X_oBye1-J)K{$~4?5$+V z9?_Xql8J3lFPiv5m$;Z(>X2XCC9cz-zoj#0LH5_pif6jWtmQoJRGGs0TY=HQX=oM* zE>X~e50Sd(!TRjiiWz!bF^|FzX!`Y_;P(UGV8QsmCE@pMsGoL+#1G*2fn#VNNw?6B z3cr8ABykA*5AZwj!f+dU>=md3M(7)CReXI+*rcmK=G&?@GUv{RKhOXZGOSE({pV0( zh2^ZKP`xTtmiNs^ytDBsT&cm>v8n~4{~LDI?Ud(ozH1~sP*ign1%2I(`5&$4!RkX_ ztRAj*2|N4Z3eQA0BBWoyHaJ1o} zv|fpENMt3(@z%kXX!u=u4vtTkOYpe%{3y^MF`3h(m*&R0{$yKICoucebiv0G@kbfN z&i+b-s|(+k1idiUr+H#08bQhCQnP>`q14UZe(CVBC`ucn`dm3N%}>B{;I8n?x*IpW zN_4}_V%m9dja2%HguEt8=5^7f@&OXeP}=x_y$(|a88;nK2Wl27Fi(s#(}ZlOsGC$M zuxu4lp%UOKC+M3nk2BezLd+v5gQ6!Sj9d;P64H&9cRBN=97mFae8K2oP+UPsASv_o zMBAeaC;J7~PTqu{SCJ}11tm9XLG)vGvV>a)=fK0Pa|=4g=cW(?&!(Wvbi zq^6k2bR>$LBb0Xg{jmWPAEfB7h7ykgM>+H9L#W`~zc9+IVwRk>u2`*SPUH@4k>Pwj zxZT&?^5koAYut_ifUo!bIqBspJufSZoYxUIaG@^zI73%oEkFnv>kAA@gF@ zK0>v1Fz2c_3sxP!jzc|q9m{dchmXQwOza)A5plbS#>0JPtYJ1^0|@+MT@NM<=jax1 zRxP&MT3ITmsAxEzjV~UJqE_S1=jtmocXATsjrHO{dpa*6Q7J;wdc}Hf+&@;A9CIh% z7wPAnn-m&+&j}tI#}X*5;a56wpa>6T+iKi+(|IxzMdw|qu)vduK&-n5tE34O)U#YA zsn?L2U_omute**lg$jl~%$LLq%ah%a=6~f0d}rWqK$At^B2@z$i;_{rp3*Jury`yI zsm}ivNUQq|Go|ytRQZZuS1wML3zG+Ia0Q_#cM}4rfPbB|J^cx0=a*~N6}qco?rOS{6o}Z!jg^%y>E??|S5^&LJZ*>i;V4mc}`y|T3x15MUh*nQnUVj+7br(7a ztTQd!@D_2kX(;M(zeiC#49e}39pgvXad!@_*imWQgNTpe%Q-~J?@e{e8{Y|#E42TI zrzK=fH64N+|M{J-Z%vs-#?PM4!ad+?bC!kGd5U56wgOi>pz5cUkVR4-QrD zS)8BCC(5&E*eb?AT8vM$V`#ej$&dHo^JG%kD*}WUrBh~ zN{@lC1|7B?U{iJDUQ`7Q(%A2~7`h$Vo(??GSHpgvq2>l#Vh`iq8I`{DY#Ci1#CPaW zir3#jT<6KWs)&qa#|o+Y5oN?;mQ!J#{nz?M zp8+k#ep&;2gm!=zdd;Wzc@-4nR%7{==Tc{&o6t?S_bmFTIpqCVHj$W7 zRXv}2KsCGs#>9Rw-Mfx8L{T~x-w}vy=#Dq4DB zoA7u!-&+8+=~21Z1qVv#yxdQ3=N;YZk4(G}lky7+iatxU5A|bzVsVZ(cjp|ke)E-f zF5fSzScb72rT`;+%g9@D=^?ok#7&Ju%{s2s4>g8PQu-w5Br@?Z+7Rg_qq5S=69qg^ zfoiw~=WoL!=_RNL)Wnz)Ipp>j-dj{Ke#~xdWXFQBW41*wZj8ch3x_<3$i?})>5F6q zgMW^KTwsnV;hWM04K@@EeuFMyx-zXdwd4HV)Qo!+55l)w(O+n3;VvJ#Z=F!vd7Uaa zmvT9_=F=7sVvGr(>($_cXCs#uOub+gW^fJh;BAlMo=dUfUw4eSihh*|%ys&)k*f}6 zC0yr;8oteRopv48Q_rK0(Dw0c6zY5YVQ6q_%Z*jhMDdk9U5I)e4uZ0gTT3aKnpTRzRr^Kk4KhT94F+6;E$ zA`@t@jaGtm)0N&80bSpYmr9^T;|5I24tuCYACB!r6bxdyIGfTD#q3^uI{J1#tr{z4(6kS=hHlDWDuz>#l7_IMeiZzHyN}f!2i!tJ5OR?9wj`4H9M*h z14I_Pu@3lxl8T(#jm1%5+n0Qc1 zddp~P|ETZ_X(M3MEPA&VB6qh#-wh` zPo);LS6G0=b^9bdChq#TgMF)*^lDIf` zTN@=o^>GnKro`rA!;!M1w~nHEIt;=Lkngr%1Fa%rid%MUc%Uw%Q4Cz_HkttiFmem2 z(cq7!PX$zCGgk90)N`rLd`iRmRfvtP(1Jsi;$3z+>(-ty3Ea-=lwa7GZz0+xGfe6Lc65YV#nSb#6~)OvWePh zeQBq(MB}t1!fc9wcA8td8H5C@k4{k5?B9P5G=`2sYkEXOkqf5cPzfF(-zr~VOL&Ji zoyFqgyB&|cO%|#0Gh{^XfD!FE{x+D@0c; z4rxq!NmH5*w)-Pm^gBL^Llene7qpNvD$SEtr-m^snnsh=0s@bXdrXw|jt@-G* zxVUwqGjP#+4>1*>vu0SSAM1SkaT;ZDWy1uY=?7jLmNk%gmCDO^1F+TV8P5E8zvF$}GJ>oMc;QdRO3`VE)+M zQ$w(6cor4iqTmOz7hp%b{s_BIyfZ|0qIPHcP*`Nb*CaXZC()GSoU5m!!?uZ$IVjC9 zI@l54&F(nOxi>$n= zCG7ozLeAksL>=@x)>AL_0Qc0i%Gi8CUT5Qg3+%#78ndRh44~J?=79nGffMnPRB%)c zVbiNeaeJDb(%b%9QG3YBklE>TXVL7`hM;LXuH^K7t^YH`f+kb%K_{4i9gt(C&GAFy zOh;_~A1XmMZE^qhP%1ToC{STbiYY2u_XrAyHih=x;nq~P2ERQ_mq_8%)?!m|*9WC~ zW*Qp7OLS6A!`pGG0TdLE4)FIi(L~NGpjMHKM1ID42dY1cEx^Lt&xUlOMf-sjs2PJwlyWt^mQ&in`D# z=wIoZ508Qo;?5q~Ib??2iwGlPq1`#jV9nm<} z3VUga;dwBOZV{*;o_g6VIFGPg|_+NSWqdt;olp=Z?G9E3z`JJ}C5kfsqCul==y? z9Q1(wIPxQBKh~W_xy^vQ_#PhZ*c7!cDC&>rw`Wt?$WVS5>x=MY96dwuBD^}GsB;Sn z-iYAj&TSC9q;v+tN$*3UFS{k}r!2%JOt|J>pSYj0^>)e#M?kT4;Cupd2E`q`mFMzF zMHpk){>q!0&su-ymil0E)c{g!LxJu}W@FPLQp@S%dSztWr=M={74d6AiPE)nyABc$ae3&;Qr$F>@s`?-7*eJ{Af zCD;w3T5Roprr=H_8rz97#aoE8o-Fnk7vLae*FcxH8-`y&k07@|zBqUROOtUORYin8 zjT4nyl5KYluPh73SmWEMW;S#$vK`W}&oBTYwW+=JqFxweMr?~VEopy#0C%D`95f^v zinsPXE?x|$N!DSpqQ#n{U|6pw%-^9lei7z&wCs*Wuk)uT_8^Roiw(rPr=dBIxD8KC zA1i+o!q7)#!d{TEk*~%hY{mL&BW=X^B4*M=7KSWZ(18>l8UG*=Fo6hsk%iS_8u5Hc zsS^*q<{Qs?=i$3AyH*oBtrF=2^iwnOft=ViondJQLQT%lT69`%?C*^IC7~)e@ctG) zz)H6vaAx|=%1A|Cpbcu>LE-M;kWgVhTC(wL^FaMyEHz!4}g-PV8EmU~*e?9^=`h zIB@=fX021p2iN~}j1|{WJ}*N1S+QOfxbJL`J+jX&R6X@#dgWe+r{!V;3|1X^*L@wG zYNk&?SCUx8!Z7MrbVquGNp5QVQJ#~$QHY1O{)E_63F{W&hhEq49vF_(-p`9VA5a;E z13PHtr(Hv>ygI#qBoymETG<<%QI#X@Hx*1BPwk}Or*{i#U;)|k5!<9D+XtS)dlR9nR^3QuE;Vh?#u|uB&%^NG`-HF~Z8DwR zS?vF-J(%-x+$bCKI+VU(>O@RdP@0L@c)>h&B5p`gYBcjZNPg_a;y>t{6QkP4oSTi;N zjPyES*de@Vvgbi8Uq+>mw0ji2OzB8AhbrUUt5Dd>R5qD!Hr?K_XhWq9_xy30=*#de~N_$CRXV5lhViIcRTu+kC4*G3$O-!RQ`*Ov zF3^XkX~a7>pC2sS9{!2^lU?}{a^y!aF8_TA@(Xtv`N5*(#~}okAB-n2KO%eN=k_qB z^FNfIzQHo~E1L1}9RM;CWERk!cayi3Z3P(z7>WJl$7W1&lrR|~F($Dw2r4D)j}4g3 z2~aw*tAyKC0@WytLLkV*E3)f1F z>Lk4aM*I~Lkv9MNekSaM@ctD0{H7l-3Cv&ym{GQ>H}M&IMjBc_Av~3%OG+sLP{T( zA?WHOZ)RK{U4%PXA63C3`Z%Xh^idg%C#{cfCM4(scbGmGE73oXdx@uMsQWpNeMsyd z|4@G9rQ}D*ksrah{4)~d7w$6hgGI@Ir;;CxColi_Wcj&0^gru_^2g^7|G++uLN+4$ z5QLO{yb?hr`oUQKy(~JTU7w$6hgGI?dM#&Gx zlb3&NS^2s0548Q?%a6R2{0KSnBN&(eo&@=YyNvu`QS#%k1ewAdFrK{pg=OXEDdd4R z|9kn7my#bLM}7q3@=r^UU%1Q24;Cf=ZAyMHp1k~bmX%-Xzx6+n|0L`mG883Ti;!dg z5L8OoAIqD{O5piH7vXl5pymfv!J_Qne?Xq)<_=aue170sfxWF*3rRyj zUi=<*!hGN#+JEGw%-_C)#VNy+az$|$h6#s6G>Swc9E_Tz=%G`{bA(76CM$!Xj}Sd0Tm{s1}L4d675-8Hy4HA|#7ReBahp!fvehH~~s0 zc9w9vN}!Pei^OcO$m0fGM~D(CfH6S{c;$}TNqUshk!;*WB9)Eb)uMI^N=LI2mvXb(MPM{ zq7Oe9Pg)=Ej5?k^xK*q>{doFt&c2dQ^J97BYSCB6j$O8ia15`WLeSk`DTCkk(&>nA z(U>2`kNE-j>FH=r>@hIlUi8nU5ujp_4(qW!>RLf@{?BxKl`@d=85Z7v*n@T_RUXa3U$z^ynHKSkq$t z+e1UeV-7mFo8bKpZY_AXgX;<2=HM*BB@VV3=f_a8C3yJ)Zp@}28|1JgHihnQ7U%B= zX`zR-Fm^LY>$g#P-bUqs=^&Wqc$ntIdPw};AmLf5gruj(nhI$fNRvIJ$*~GTDgmj` zLn@5@_T8~#Tl3ckJimuRI}&>_8=gmJ1#D=lPPi7<|SX{AFMXAs6Fl~y`{agZ>zkJ4U@4Z@_`(mck$ z_6Ej*MXfZK@ppttxurRbza*SSILi2A!qm%3HO2=BQ$H&`g6ce0XAj{h;e(7{C7e!p zH{*?jGYFS3UPHJX;iZh16V4=zyWU9eNy6m`PiMS{Fuo29`5Dh6j9Vp;pYaUB*@Oo$ zzMC+r6Y?`2N4OH0xDMgzjB66EOBl;J4l2FEQ{yL($Q2D zP1~*fBg&87D%bOr?}-GEGgtD@b^NJ<^AtWZMQ|^L2Pi)|ouxa4uSEH~m7nvPN?-ZA zm49%LT+ex1rLX)Fkd6nT~99m!ReCk^dj+hD?gbpxxPxi7JjF`b}M|M`ii!Y z_Loyn{9NTH=Xd08p${zjPFR`daa$%Fk8)KaID0o5Vk&{J)A9-R{kA;U>X3ud3@w z5vQx`yOp1_L--1nKVA7r(X=D#dhSl)Pu8cS7aI8JZ(Ab?Ir!rwd=5Q0g+GV>a}vFR zqSHW-_(uaD{pC&}ld5R}{zd;(38g4Z0|NcgKtX?nQ-z-fV*1-pBQXAyr{(paBTt$} z0}=f>`Ov`2e`XGQG;q?NlfETT+xwpy911FYu`2(Me;&JuE(IzJJx9@3rAmHsH7*AY zT=X|b(P^Nfza0M6G#be0Z@Qw>z(s$K9%vw^zmi|1o@gMazx@gyIVR;<<&&H-q;dIL zR1%C4hA~Y@{>fkTuo^GA&=3{>%l%q#p7P_A|J%_1kENb(IwroOmz{nZay9Lv!D2t1 zcJO~&np3Ht(!ePVoYKH44V==zDGi*`z$p!!(!ePVoYKH44V==zDGi*`z$p!!(!ePV zoYKH44V==zDGi*`z$p!!(!ePVoYKH44V==zDGmJpse!w6oT)&Dj%$N(E?f>=A>14| zI`feOM`sG^!x5d%Nko-TVWJf(KL>su99<`$XrtjM9{CiH&T9;XBN~+_?agU~%J8 znJ6x$K?gnQ@FpGD#Mu!3(>s9|&V=*9S#Xp=eRyZUkt|){=mDIM~sD4(v= zH7)~`KxHDjjtfMq4M+J6fLj7r07sY{kVfXX<7~Vz2J0&o5ERe z&EU$zRfNlc%YrKhR~4=XTxB>PTm`sFaM^H~aMj>y!c~DY;Ho0sRJhu35x8=2S#TBL z%EM*D)r3ofONYyW3&Yidi^65VWx{=gv<|}MceKMpUgO@WKRMd5Tf6V3-0hAR)32A2w#3Fn8i;4poH-;=c;N_NVMn-&;rhY#8j(}je}peD5AGtk-f(^3&Kr@F7ec+A50?rz z0In}w9k_aML*c5!Md2F2T>Ad(J z1HRT7ap4C#FMi$XoSEl?vwHao{ctv%A>k5)OG1577jQaU3S1pHKU@ILhBM%Na3)+3 zt{+@QxXN%<;LeA;2(B;O1#tc0D!~Q%Ar4#$ToBF&XAH?L)Q1G}e3UQB5bgy&I5Hq8 zUr7)SWh*h^Qs8u4kM#EC*}d!MrS=|Cq74D9x39$N?JM-bh2Vm48NCB}>Ag{I@M`c= z;Op=M@YCRo-cw4#h(~hReSLWWIDcQ{1x#{T2>)8PByLU4b;9fPyreuFdN3^)xAd6XcJ5~N>(_$5fY1ZkHb?GmJ2g0xGJ zHq($c`$)S4X_p}F5~N##bW4zC3DPVcH}&$uO!H}o8|O}MM!2E(<0YYEo_?rgZOa96=y30DAj z23$|LbK%Z`>jpOnZXjHDxDIeW7~UXU3Y-q-hYP`_!dY+zTma68_F)al&AS*Sp4#9IyZeKFi6aMj`Rkalah zR&Y(>+QW5$YYvwS*9@*5Tt3|Ca2?^&dPNIU;h=FXn%;=KGJpPG<@{y^<=0hyE9G}p zem~`3t^8Y*f0y#_Q~qPhU#a{p%HOB_&y{~v`TpNj{>rbb{8q~Es{DS+zgqdXDE}_y z->3Y?l)qB>Ta>>~`JXHQsPg^4tNfK;SNW}!-&Ohjlz+AIZ&ChT%J0^>bGw}ST?Sn_ z_=+2Inl^9JtVwR;77bJgRV9RhH(YV!H95Bo8a90JO<-%$B)3T`t;gUSuOBq5T~3c4 zU2>YX!KGX+iH|Sl)W2r%h&;G!M_$=v;7vo?-EhM#Lr$lk#zTkQblsqVBbtDi)3QbL ztJ@B0-KJ@7^JZ7)wrO?6)mOA`(Y#G=tCm-`9XP1<6>SIRwjOvzvnv~Ddb(bxPC3i0 zGb}5bRk=>OenG9_StD?e*=VAfb#mwf>CzZFfKzV%hro0hwv)`g*qLj6PN z5mmKB=qE_b4&5%$?kqP>uQ*o6+2R0!F$knk+U-K^LuXVx3ulmn3R(g~35i&gNos*M zUe9(?Nn=FGgsChlH)f2ug<}{^pmS-{o1p1uxJ72Ba*lLO%VEHG0OkRSkrD0RD2kHu zx!H)K9NGPe8nL~K2zV8d;*^FIA9Q8o^jJw;52B(3oiRNvvG}CK<0va$S8;X4ll)f_ zOVfxaC^9FPfn}$fbL3Vv?#Qj$1DXfP>uFj4sx+?t6Pu#t)__;(cA}O833_Cuq%#aa@y$ayizT3lvCT=L=}vVv&=2TUitAt&cQn{oyjwXr7@& zHOYTfcW%#(%U~t;&*Kj;+)}+IblR%2Fl;>&tpV3v;@FYut<1FK%4GW)FI5?-SlYiG zI8Ru2PG$U4>zG*l_;|qWH`P%ZDYR@mOl;IAA&JwdIIA*KfTXTgwr_duV!RC|mJ0SY zaf*;yrmcCWE&l`UyY14qI1MhaS{ z_o)?tSd(}%94l!Y@aor9V!F52pM>mXI!^oomm_g(5DhruaQlk45Sd?kJ8~$oAta9E z+(3CWh%5eYCH~*b8P_)#rpzo_O-BNWB5>WtOPo0Vv2K#bi{#<1OfT|uxww^xdc{Q- zig&J)tK)c~*)%2&+J7y%r_0A>XHgSnX9boW`x7-+cI5wmlqsqD-1!W3roWUZp=)xP zkKb3FnR!xemQnn_A0C~qlVq~o0f`FFi=}vVB^Hs(oV;XpZ9&p#E-QLRmXj#F(>6KR z#8wlx3tUXG3W=3nW?XmYi%GSXq+v$_)%6l*SX_uQLY#Q;j9arZV(^8;MVbiWHUGz} zjkwf*ZQ4Cur%dDjSAzWY1SfF-F4N|nU4R4|#1hB*F~gu!akSZRoIdQCFve#&tgGa; zcw(21y6v)zu>YfhzU)x!oulwXE}{MZzptbIdOs%KU;k#j^p1^*9hCDsMk zRD4FpFwLnrY9Glp;WlDUxl9e5xZfmo)H0JI(Bm?ftqI4_B`+AgDo7NO3z|4Z;0q^g zaM;46b!wEB&i_hG=~W&vSV+&vnA~^bJ2**A&8dsy?kPHkREZcjNg|@#QJe{oGJlbT z+?g|JR!~OVf8Vc^_R81}ckbgcV`VZtxn}fN8fF=}DaBh>d?fLB!#*$x_GHItHj{IW?<8YiANC}1 zqvRwcPBx&4RmT#?8#(lxa3>~l+0F$^NtQBk>dj4z?Pxr`<88Xd)g5;24X-Jc^-Nsc zB@?amMX33lz^l0IRG<4)3%> zxtUWpBzCe`yo;Y$W0Y;&x^Sk6H&OXQ897hfsFUUVtGaXL=ZxYD8ir+!i+5&|w1bn- z4oRx`^o$$u@$ZJl4b$JO3I4sf$r}EfK8$^NobIwuY-Wr}g)?(E3KGXH}qx~prB zaJ*a3>gJqxlyg+CdU%;JG+zI#+%kfkxB({#lBC}FoPg7hTP3eNiE|(pkazLb;Knik zr6A!%Qx@^+1pW1Yx$hWPA(uSS67z+>ygTHzU}YsvXql`6?|nM2OwN>A)w6eB{kW^h z6F1SMsw{IRLgke#pSNHq*P^`Bnz)8p4~ZI6g{mxx$Bj+HqNvrVZD%K%DpYq5!tmh2`jC56K8R_i6<7vYslR}lT{I~YL1)n zug&}k>f0+tQa6XijaO>&vLZo;T(p0%BkDG;q{U>J&dav^+>B96mNv=0#53I#MDzLb&i$7u|AYoLH(eVi=H)c%N0tD8RD4hK~ANY1?yi_@NFWX$I6E` zAdp6{IyPU$LoEKy2!$yjy|jF)-X{`R0Odu~^nU4Z8TKPUd~VNa{$wyNcX>i zXTUbK4H!dZVzx0(&Ka1?mEPEVWZ&2%dqcA+`ktPm#d?VOi6&dds> z#$moeZ_f^A;>xRfP3T!~0lj80>;j5^RW$qxM0r&wWJmS3TsH#K>!bM5Y%uT8KOpAS z;YbCb#=#-FD10ZT0?@$cqHsa3!keW37-of^P72q!{$7*%8=@o6KqaKLOAl0s_qraK zflIsf!1zo}dqWRQM7j3}?M*#U1hiM@HusjU8RhY>f>~)9@-80hWh%3WqZNg#bji&B(#^ajK^y)nmoM~6@xFHG?c=oJV$ zkq;zBz88_-6}8N?{P)Dsd`sg}zNfSIz8;v#MDBDS=-lf*6r~&#?<43Y+o)(%GAbKY zjH*U8qq>oUPz|G|QOm483G!77*G8r0)If3cvklW<7pMjryOwfuB`ri1UbDTTg>!*w zJwoUlBQ@N_MJYxkdYXg$i&LWw3A6-bs8hLU3xL|S0D{d@L(Lpu4mDb%1+@aMLpCUs zUnP15KwauEp>AfhodbeR>`)sAm?MogU_jl~Ltm@MzxDaI0sl7S-$sE(pq>^u19j3^ zOR1e&+5BD~gKFOGC!_I$jy9zQmsPSlq8ge9muE%u5%`h&d+_<{q0Vu%$v91mb^|`D ze?pO72(qCOlzyikWJ7aQZw{hkXau?KzJ+6GTDpejC$3+|(EKcj4NZ4s)hf8vvpF=OB?^^}sTCzvzKUXtKZQfqTQ6_Pex_KXfgn zjyb@nOaII!bCLA=a3^SngZjC~>ES#VwK8h>y8xX&5p4cvqR{=l1i6Fe@^4d{I%_j6 zJ*A#`*r;zbFd7<-jMMP1vC+gh-N=P&YBV#N8!e2MMk}MW(Z*J>5`1d8gf>UF~l5o*1ku5Z^f z{SOhM_u)rwv$e<~pmcqYUdi_uLK*t3YW~Ftv>~hNAD7{K5}=%ad{$@)0&U4^2Yq_^ z$g@D1LBF1!@iao^gA?>rEwmKDc4TjZ0lh+KIZ%6(P>P<(hK!n;7EIOAsI|x{fC~CP zy^Vh*LLIQNsp-!Kwa6NvEd4z_oy&11jTZh(GW?YNS+Izj|I)@H&}QL&w&hN?%6G@NQB{h&&i28eA0W#_m zcm%)sfhG9u8h9GN-2zMTTM$@*-|m6lNcil)1^7LOe|rSJMYt#bc4jMju9k9^xgwaC zk)x+{EO%K-r_2f|oy)g2Hybo(INe8Lo@1{n$5QrirQBlBbj}fT3q8lDjr(JA#_k!4QS|R1^2d#q#5;b75t7z!S5s}m27K?@8vfq;&~$Yd}G&0sj${`F)sGq%=D_KzjYa;Kuc#VN*9c zC}Mvk-pAs7VrWLbc^(ZaB^ig&qblnc8^J&XYV8$x9(yT-uqE}=sUBKPoX3$n6UOz^KdUdAtM*%f`h8_%&iMfyld;I}DojYX}eZ14+S;@UCDiRU|>7K zS8=}{6qpMBt9f9!hOb{7n2jsf^7X-ixd>m!cHsKJe1vZZY(?@zxTUb5>*MCNs>+ldaC|QPlhc@#SV}F+v-w!|+`19*WNLl@YS$ZQ@)Ysg0NmH8dC|LUY z^E0ErBJd5j`Os;p{-0e`wVI{z_rN*QGqNjvk0QxW2oo-a(k%E!`*7j&D z)bug7f%S6L{BKXwdyQe`-@zT>&Omi6b_y}XUy@w`Yt*s&8(`nanT(@nr8Rw^VTWpg zX}tblf`gILEr3`pQ-7MMb2@kFfo;fWlFn&Q)&sABrAXJzyY)oWk>3{ad{fH=3!E{xm&J?{kl&K3yiN_oB3yWoH|g8~-t9=ocXQS9sby zQ>VdPzulkyY$@t}nCA{K@7I@rc{4|xBM}RYa*o1IXKj0-`pqi-;(2Sy?E0vf;*W;ay9I}w^y-Kf{Z z{E((qPnu+>oO#tv&b;a=6U{k3u^Q@5tZt4^te!UO5f9C*Xny{TNj6y~Q;}D(aWPD< zg$2hhgq}5_9Zk&qfm4!zAlUmRf(!2!=@N7xlK3Iz-o4_El(RgU!`?BV2toG#nAu=yizW4b%=P z=Orj-EvK;~Lh04j-!g)%zj#W1?o$djHM(%0QV@L# zyE)h6-wl{I4>4{;=q6*RaWno6!@uFi2xFvi3;uO5ZV8SutLf%)GYx~qfyf?cYPjAO z3;o>)J#YSsVbZ@o9HxoQ3bQ-X4c>yWk!Cgu(v&x{56mNicjzgRy$Fre+aW~L?OXIN zdNxnBUodMSGR?KaR81?*j--t<^$f177o|3wx_Z$>w(+`JWzq)~b*HWbx^=ZWURS&j zSYwVrlhZrt<#@P!$vl@Lch)QMIK9@yLRi!D^@i|#`%#op{*q{fCXA!)ih4cn056*; zyUZ9vB-%QY#%_>tnP<4`O&;t@OdcIKn1KV}+Gqyya$xOD9?V`5Z?kw?OwHVCcE-r{ zZZxF^`kna*Qmj5Q9JXDQK3xxnX)kIU%bSkPPOepfw1kV$X4I|e>ALJj{g>8o++Me{ z4$`sl#qd>uVcKG`sPW%c{nkwG1v^Zd6I3s(#Aqk;RUco$mxfZ@UVX9|2_Y~heWu=; zOY)i|R9BTmAeAIcCE3MlNzc{8e%c%pDC^2Jz5nYbes|4Q56Ef;gOuZn_D~X9P!^qN2Owlco^uZWJ46E*k7X1(s!sBr^)! zw8^0$$r5@5$xby;4nKjKP*XKZ8QkKf}9o^BPsG=qgPw17Id$~dO z(?o0Iihn-N&7G@T@EKKz6_P967ztNJ@m`?UB5A>=QRkHz(ufl5kmjonR1iEfEnEwM zm9Pg$z8NNBH5d|?*%-TK_gmi3 zdj)AcU_5BdHy$z;7z>Su%}rYNa?i5*vP=f#1qcV8A=$NE`M^#?97V^k$7pC#TRS+#)nK)L6V}&S)iL@Itj%VZc zIN6O6gtyBA-=a+SVv@eZc*1xx z1Lh~jMJ3vzebuW+{VuXMNa@0Zz>x@R~?kh_&XF!+URgk!_eV4h~jDh-&D%;Gaef=Dwb}WiE3=u(~O8nFVGuib!*rFm0hM}=+8<9XQRyAO*@o+~^eC0{Jwqb3`d#{@ktXbCF#Y2z8=S&RkG z;onkYnem)?z`O;$`+eWi8k#m%xA#~X%Mluru*Ubf(6;kkBlkTmVj~iBq znBL=(n$H+n4Bt0t&(eI($Y$mbgn2o2$8sNm)-R{T`Tk^n+(Say~({q ztWZV-sw>p>ENm>2{n#r4Wb3k_F3>d~3bFyATR(3}UG5X_ZSnSt_l|gbQP3AryQ_@V z#v0?L%rEIRiCXzJ$o3Gu*;CWC@G1t>J@n3);H_kcA59PHseEMMD>fE-NSzrFNQVZP z`n6C~6;&WnvBIO3!8ZuKigrbZMjGgpB@T^D#>MYjV{HvC{tHcpVVM%d73gOSF465J zA6W%W&ob#Pqj=>Y{Tdcco^W8nXLJo%bfyExOAI^3_c|`82M6F+*Q-?xUXFla4Ll=s zDFP;5B#GQOJ)fRq@kg7jn#5bhvNpo^TeO6-Iimn)l)~X4XBX1!fd5uI#eaMJ1c`xH zmKy0s2L9oQg;72wH3QPr;1XDR zW_|qV?fq7B_`dI{+ni}yO$oHck5NwdwS;exb@Xk__MvfWeySMX#`YTX*))_G<%KVh z$d^KTa`}SHS3&qfiF~OZzBJA^OZdWxd=XE+QRb^8eCdgN86LiJ%vW9bG86f@6}jcB zznuibvV+6Gs`(0R|8oe`<==YzTc3X$@NYx@ zZRCF*Jf~@aa(0_QrghjTZ#E{uD*9f;^(NGdeCGxIdjU?@0u}78TEo%Ou%EysZrD@M zu=kvab_8DjyLNPjrcL99ZEB>+#^8{)jI9Puuh+!t4d1YMP}lrE<4(V2+-mr3^LAXH z6u1b#KJKWqbSp*gWd-#Ce(O$L#Y)V!ni~N-K&Edpb8=HP7wBeSJgx<)f(RjA$XXAO zYHh-A8pjW7{uF;lf6(9FX8TygDMUH1CsYdQoB|M1$lw&paSEB7LV3*y*%gi<#k&JK zx?C2`b+Lerf;*e}D)Mh7t0kzFIZhQVWLNDESj^c~<5p<1efH1#CYj1{Je)tn2_!ny>Fr==D$B6h1!klM5`^&QQ;(4eYs&82R`zis)q zT`G3BwD#70_&r1ONBx~cc84Q~aZi9Eo_Qff9%4}2ISUEoaios?+lhZWTW^A|3+J8> z6?FXwF{ZO5-B^;grp@(H!1dAIj0E-byIZv2JR8MKx6h%Oz`YR@7j(0NFUnDSaMTyg z4AOW{WPC1XaGqxOBK8@~ZkTlyd+$W{K43qe+52kt1;jp+*-f*RV(*v8-XH82GW$iE zkzrrF9}#Ay%|LN3pNE@U`mm$*S9O*xrJ%FOj<|tS6qA0Uelq(aX3`CTx zILaW+zM7)k$5F1~D2r8;YZIdkMwII~%Jmq7>>Dbeb={vvHed+tvYFrbEH(x=rU1bh zXj~&VL1jZZ*3F1N%(@l)!`V)ZU^{W6=Fjx!8RhMfH1V4gphIJHYg98aIZM$8j6$r@ z9RF5?{;S!y{TIZ!%y&EU)g``OV?e!w`R>&0LY)$u$4p~&=E~*N#$jGNV7$(J6DW+6 z$@X1*Q}zMoo+RAuIL*n#Q6$`VgRz)z>^{hhQ-pDhm4QM|CB|vOcn|j1D;Qac#{li= z`Z#RN%#ZZMuMc}9DZX(KyRoUPZQs3F`2AsC<=E*wg+E9q4t7vWmau7WZU z^*-MDxBJ+P?$@b<+tebt^5*Ck-6fwZ-aOqJ1N4A+4~jQmyobbFpj%_XvQW^&I;XNo zxAx%TBZ3y|);^#|b?aStxEt%Q=CALs?r&xP82u8tAC8tAjr;pi+Fh`RO1Tv38q}hz zF|+$gh?9h<&^bYki}!Psy5EZ#cAsb{a( zt0T^$@NDPY@?+{>X8*P#m0;CLL@OW}IcF zVfj_WX<+Z*vMmm0LG|h8&m3nb#d%HQ>=NeJ#lsk6G_>E~d>##-gE$%HZyaY2#d%ZW z>=ov>#M_5BjqJBM&ST*_5vQE_C&$5U2z|gi66b(0zboE*h~pl2wBN_wGi-@Ro=4nF z`%gV3Kt1dO?xg6qW*T*|4|SS1+Y8Y4sgE6$j)FeM-J`s{NKcKtfCMVokLf9CFCz4j zbjk@xk79hRpGFDXiklxAb7tAk>LKR*gl~n~4(F$O%{V9Joo%ns(>UADq>~}r2zBO) zsC((nmAE&jQ49Z^56s$WI730bR6#UxrhXyRbVXGV)W}PSTiLE)L|LLL_9op$uS%QC^S}~I9*{< zdCbP~6F_sgU&r7u&ahsFX2u(6OcM-l+7k^JevYz)`6mhgXy%_R{5YPX54c9*QzuqTGv3bR5(*eCVZ5gfGCvxvKf7U+1FCB)(a~H(NYR zdW>fF{YD7vPlhLey_&g1vCkp)xxzP3yayn_gN7f>Pcd_K^BKjAdtds1hlFc^cngts zbNgX~=B`hN4nQBNS(`Fh45ZW60ofzK!t=$EabhW0^$8Js~nVShmZ>=G!>S$TK?H`;FR|VZ9L9j}#l2b?hpEeF(i{kYxC{7*zrk zI$)6D2%Ldik>U4%8=0ruX=w)$lEbKGecSSV;6fi($$^gn-!m?u?$^K$20jIR-=Hly zvzc89h5W#lu{p~ zQsV{}8FAdoB5xdajJw(6&G`tg3izXRgNm)_=RulcPc*rQ-en@a7<5cr0Fs@<7X~Os>Gu|VN(}@xHo$MLrM__!37f7d>4O3+eI#Z(1 z`b(pOtoAJPP4w-xykMDa_CpkNisHRbcxho2c?<$HHm9Xpfh7ptFLxYT%(VJ|4@>zDrCEW7fQ$H^hWSn!%o438AEA^Nn=&PqT6j$Id>q-GYcJu9)~C%x z+$SU>Usy7F(wos!-i)43%IF!&=vm3=Imu|LWV8&5U(VT-usqL8HW@0L72a%K@Mg0z zDVrB5n^ls{YRP7eWb+cTS%7^l_hwU)l+6apW}{@YNwRrGve^u^ zooDp2x0qX@;f;Y;(P*~vj;dKHni<#$xQ%b`m{p>gsDSO1(yJ182a34Uq(#Cej`y0x zYoOxol6a?cyw@q-Zi)8>;_WdXLw>JtZ-0|dV48t+YC?NWYh*dPVFUhsrgbaAZ%efO zc-pgfu%W+Cu$ghb(bs;*9Ev112gZQq0N)KVbIY57I|1M2lYIoS+I)|adf((lG@e)8 z3i^kV;z6W%L8zzE&**PlXn$npK$I0Od%Cm3k z#~D5fJeLj*E%i$~S{Y1B(?2D2C^fAjql!S8IGg9oa$$Kon^zt1f=ryv%czP_Ke{O! zAeLq6CdI(nJci2!(%C%D?0LQilKNFmMk|<_dN?z^2BP*)Ju11rAdzqmk8>4Bxu#LB zD>;cwoWt``)&lvaaI_Zii|L0+fZu~r65vJtWK`x8l9C`wDV^I9cY z?}{*5EeJQQS!EZe9?M9pgNSR=M^GM?9_CEeNQpj|Oa!@^T$1@?n00h1moY#pe<{5@ z#SiH@4A+VpB6?YXwNu4ScI0wFBUIux(7oH7zwyF>ey) zPJkriD?%;MrI~I{=kB#7oqN{Sbhf11#M_>(*{`NwZsIFy9Ce4J)K^dEQtT9XvF>LX zUrQfO*A^L2D2$d&O*N5+97BMG%jn&5wQKWFxOI_-m{UZK}!xLb;St)9hs zzae=K)`JZ9@aA;tb$T$3TI`$YBPs6fdW3t+o6MZ)v|E8x7Va%qQIqACFeqM_y;7KA zNRX=jt@J^Z;Akb=TOQd2xw2jD$aW1U#|qsi3VmC={pp&~&;HV!jx@Ihnq<=rWi~H8 zlzI6|%uCO#D4UnB#k`zH=4HS)Vp70DrtmOXnXkpX91-*KEiCv&Fku%Pm%xa9XC6T= z+agntOB1tEwiTF$(Dx>OG;@GnIWQfeA0)pku-QHX@JE4H0?qfM||Z+V@efGG;ev1dHX$@Xx{#aCYrZDqR2LG z-UPZ8*3mU@ok+uM&;E>}mB!86F@dgmE0pR~=54Gr9%bG-JLYYix0wi}T-m&h7h~d> zH-TKkY~Ch#%-aNyc@yL|kS>mS%ctmU&?ZXO%Dmkr$T4qS9rHHHYu+YFiIf!*b*)Zm!6?yh;e+hg7Yxe1@`Bzz7h%-y5dYu<`I=1q_rzlRgQrxc6~ z*c7jMn<@o!%-b}tdArAJ-lhw)W8UudnztEZ-e!tX1JVQ?^Y)Fju+-(DU4&@7NrlN1Re9X$RnE|SGGQmZ0B=wY~CIbg)SEF zQRww|Hapwd?3`}4Rc7Z8ui5$2Yj%!#%}${Y>oLt#W@oGqW@ns_%?{#`*%|NS_0$9( zmtmq$GcJYMyUf1JN2ftvjf}@;e6BemyJ}z}LX&(n=9<^S?8AIUXQViEMSq3 z#!M5>U$V)&+s8LOhOx;TP)vDGk-R7SIPa;F_cY1-9^`#FO!{xytP?#x^N2+4aqVI@m<}J!J&lVkx zl5*ZBMZ8~%IEP9-S1!$xOY_8gfG7`&1oK6Lhaka!i~;rnzGJ^LvI$wYG=Io80~-)p zD7k)}ZS&T}!*Ub)aJI=?7XoSPLgS5zMLx}bgwuJA(`jY?qS9IHP3KW>Is)VAJce{0 z=X7>)I<3t=RXR(&={(_0M_@diCy~xmoX+cx?H2;;pZuSely@>Nhnh9I8($}79 zFjmpxeZ3bYPjz#26{WFSBCkQQU-Eqc<~_VpU+e2Z5r0$>UzUjK7UnvNxLzWbAmRq! zWH7(US!@)}+LdTIwTT#C5ys76+~T8k*Is7aDvT|eaT_si7sgk?xWhLGjBhdHPGRi9 zjIR;nE@6BfjJtg+!MKkZ-w?)2m~js=zA21*!T6Ss)@yGw<33@$y)vCHc$*ma3*$Rr zJm7l>jQg4KU16M4*|etAz$TcF?~T3hL$nVh#)nY)K!11RD*K>s65_oRm=4B|q`z!|Z1)_~bvJ$HIBvR%C7v=}Q!KXZQ!Tcx z)2NvD$fdjG(sU{Gy%rn$8J1=YvS(TuDE-0kPzrMI{P7up1Arr`Zls# zV4+QEdXedS2WX*%Z4oVk7tr|L1$@}5t%x-6FXBw}X{L|RBZBS`wAk_y=YtlnV>{>z zQmtzccL9}u5&b-BS=ZyjW0rLjyvK3E|9WGB{{~~Me~5pOy~OH_=$~-pGxfpatJKq>;KW)8%2%q{Ef%1$6%dY7q6@0Wa`>aLhCG`yzf_ks# zEWTs3)Z$jWj1;(B^!~i)aRv1Ff^{pj@fmAlrD)>=(Z-8jZLH!GVf1FOO~_%jWnEJT zD|PW+5^t?|FN?PhGJ|D3Fv5>I9*7R8)a3b)rx^DXKu1dY4c;E2==3 zI!UNq6jh*0oh;O@iYm~h774YRq6&1WcMBCeLp-e$=u(S?+Felvy3{E`JzG%)y40yc z?Ww2&UFtNUo~x(=UFtnT?W3pyiE8y}h_h*a>z+|K3gqY0Ff#Y?+rT(4{W- zQU$uy=Y{H5DGPL|D}-t*sz8_ef=~mBD$u2_6l#j13UsM23N@&x0$u7Vp@tMypi5mX z)KoUyDOD5^l0 zS|ZePiYm~hZV+mwq6&1W8--e4Q3blxOoOE2y{vUFv3`W+|#bm%2r$*@`OA zrEc|71-jI2LanIc3UsO4g<45b1-jH%g<4rr1-jH7Lam~x0$u7(p;lE?fiCqmp;l8= zfi88IP^&AdK$rTuP;(SjpiA8?)EbH^(51d1)S8Ma(53DXYAr<-=u+PlYHdXo=u-Cz zwT_|+bg6F%wXUKHbgBD}YQa|xh1-jHvy;OlN^)oM3piBMSOBLu+zwlB8y40ALD$u2t3bmQihCrA4 zrBItIsz8@|$V(OIQor(21-jI)y;OlN^&2l$pi4dMr3!SZM}*o!m06%m{nkqr=u*G) zQU$uy@4ZxkF7*d5RiI1#(MuKRQjdD60$u7)UaCNs`m>iR(53$3r3!SZzj~I^Scpi7wX32Ebg9n@wVR>}bg9n?wLnn?y40m!sz8^zOsL&eT!Aii zxtA)?r9LmzvsGMyE_H>MD$u3AAk=eIT!AiirI#wurM~E;3UsNfyi|cMb+u4?sC)&w z)HOowsi*>7>Pud#K$p5!sOPG<0$u9MLOoAW1-jIALhYrf0$u8Qq4ri(fiAT~sC^Vw zpiAB0r3!SZ8@*J4E_IVo&sQl6bg8chwXdQIbg7$#dV!(}bg5f}+D}mhy40;g?XRc; zUFtTWUZ|)7UFvqBUZki3UFxesy;xBNy3`#){r~8C3wSN6?)`iA85OEPt!|5}K&`$RSAkl63+kz1%s{Pfhk9D50=4=!)YC&1sMU9%jt*6zR^Nqs zMyLX{`d(ZGYW01nXNLO%SX=XkYIPUXvqKfA z)eoSK4OO64Ka8tDt$q|&fm+=S^_(zfpjJPQt3a)O0(D%tFHozWLOnNBfm;0x>Up6G z)avI@&kt3gR=mhYV{|mSB5H3 ztNWo|6{q8Z&)!(4r5UN0} z{torVPz7rB52!bVDp0F`LcKXufm;0w>iAFvYV{D*TS676)xV)m2vwj~4?~?8sz9y& z19eiU0=4=t)XAX=)arjwZw*zTR>vpgOgK`3TD=A8lyG05Rwu+&pjIbBof_^7)aoRt z(?S)f)yYtA3ss<2Z-sh$r~fLb_sMYy#6{yt(Q11?725NO-Tm@?N9;oxfeSunC z1a(2E0=0TC)Pb;=~)anCJ?+aC+R#!q@9I8OAu7bKGRDoJu4RvX#0=4=e)cZpfsMUv{E(=wl zRv(7CJXC>NT?2JRr~Z?#Ugep+0uf&F9ZjGxzt!|5}K&`$BbyFBKP^)i2eJ)giTHOxy`A`LF^=+t|Llvmicj78g ztMA5DpjO|5`a&2pP^<69RiIXP#8seHcS79~#thWzuDA-+>IYC?4EF_U^+TvHg(^_1 zAH`LmR(C^vIoubh)sN#UP^+IneI?u%sMSxQz8b1Pt$r3)fm;1Mt^&3CMO+1H^~<;l z)aqAp6{yv(<0??A-^5j*R`Tm@=%Ut9%h z^~bmh)ap-h6{ywyaTTc51926o)t}=kP^$;yDp0Gx#8seHe~qg^t^O8Qfm;1Nt^&3C zM_dJJ_0PBp)aqYx6{yuiaTTc5zvC*9)x*2mYt&Ycw6W<6`pjK~*t3a(zh^s)YPK>KStxk%oK&?)Wt3a*Z3UzCkXP{Q6 z#8seHr^Z#FR;NMT7RC(J>TPiqsMXt{z7_5Z)ao5^6{yweaTTc58Bn)}F$1+aGp+)) zIxDUMwR$Jix5Jo$TAdB`olpg8^)9IIhAL32bK)vct8<~g7w!wx>O83LhbmC3cgIzr zR_8Vmim)at^x3e@U7aTTc5MNoH!=>oNSFVtP33e@U-aTTc5#c>s=)g^Hi zsMV!$6{yww<0??A%i=0ftIOjmP^&BADp0ErK>Z-}22iUj<0??AtDt@u?hDlFYN#KD zDp0Er##NwJAA-6&+!v_ThoOEFsz9x-iK{@Zu7&z(xGzwvkHl4=Rv(4>S-3AytB*nb zJXC>NeH`i+p$gRM6LA%&)pbz64EF_U^+~8-g(^_1PeJ`URDoK38tONp3e@U)sCz;c zsMTknejBPlt!{w&eW(Jpx-qT-wfbya1!{E@)V*QMK&?I(SAkl6KCS|_x;d@_wfX|o zAHsBjTHOM5U#J4L`eIxKYV{?kKZg4Pwfb^g1#0ybs6U1K0=4=o)cv6f)aq+-6{ywM zp&khL1#0yTsDFkkP^(*^9tu^UR<}X@J5+&MeG}^8Pz7rBEvWy5Dp0H2q1sI_Hv?+* z?YIim>N{~2sMU9&I$_K}t-cpmfm(e(t^&2X1F9Rw4Akn*xC+$juDA-+>IYE$FlL}u zKa8tDt$q|&fm+=iSAkmn7-}+17pT=w;wn(9pTp$gRM*HD{>Dp0H6Ky4AKK&|e9+A>suTKyJkt55}M^*gAoLlvmi z@1eF0RiIY)LhTT$K&}1&wR5NfwYo2^0=4>MTm@?Nr??8#>i)P2WYxKsPQ(uQ&Qewc6gB}Du1bP^>2DBFR21bP|t3g}hPYoOObZ-BOfwt?ORy#?A1dK>f( z=v~lzp!Y#LKs!OZKp%iU1bqbB4f+`L3FuSMXQ0nPUx2;@eFgd&^bKea=v&ZtpzlF@ zK|g@@fqn%21lkWe0Qwnp5cCV^SI}>u-$8$X{sjF6It2O~bQtsx=wHx(pz%qxza-ip zXd-A5Xfo(l&=k;A&@|9(pxZ%rfTn|HfM$Ybf$jv&2Hgdk1DXq(2f7AJ3+fZAAmjteFWMK`WW;H=u^;VpwB^HfW8EM1^OEF4QLPO zThMo)??HP(KY;duegypl+7CJa`WbW(^b6=$&~KpML4Sb$1pNg%1o|6v81xV5U(kP` z@hP;w6xttXB4`q5GU!&&6wp-AG|+9J+d+4Lri17eul&p7nV?yqJ3+HScY)@B=7Q#d z?gq^VEdVV9-2++#x)*dGh+cNVzujH}S_--!v<$Qyv;y=1XeDSBXf^0T&_keyL2E#3 zL63kQ1w96O9P|We9q38WQ=q3o>p{f)Zpc$ZBDd>LC zGSG6+3eW?fm7rCi)u0DK4}l&AtpTkCJpy_Z^cd)I&=a6_peI33fu07f2R#GY0NMz8 z7PJZU9O!w_X3z_uEua@cFM(bLy#jg_^cv`O&>NtwplzTxL2rS!gWd+c19}(q9_W40 z4$w}}F3<;{4?!P+c7r|!eFFLv^cm=L&=;UDL0^Hs27LqC1Ns*99q4<|UJ$)=hKGWE zpdUd$f%bzAfPMxY1pNZ~74#eEchDc8KS6(i4uSp#9R~da`WN&cXnY3kFN5|6nh2T% znhd%XGzByjG!1kc=yuQ@py{9)pqZdqpgTdcL3e@XfaZecf$j#)2Q2_C1lk9tN!etpz;-dKB~+=yA{!pmm@pK~I66 z2CWA@1KI%E2znN@3G^K3dC+Fi3!p8a7eOz9UIx7adKL5<=ylK=psk>7pf^EpfwqI* z2E7A%7xW(Jeb5fjPS7sU2cQo@AAxp*J_daP`V{mT=yT8)pf5pRfxZTP1KI=n7W5tH zd(d9c51@UZA3;BX_Ja<9eg+)`{Q~+G^c(1R&>x^bL4Sb`f&K;^2K@v27xW*9UMt4q z^etIvf1rt=NubG~TR~GmQ$f=}w}Ea49Ztxlq&u*2IyTM#%>>N?-3givx(hT1G#4}v zbT?=|XaQ&;=pN7_(7mAhK#M_3KubaQgO-7ogI0hZ0IdYA0<8u;2zm(gFlY^EE$9)@ zqoBt?kAt27tphy?dJ6P3Xg%l|&<4;((6gXTpyxo(gEoU+0Br%i2zm+hGUyf1tDx6F zuY=wIZ3S%uy$N~?v>o&|=pE3zp!Y!UgLZ&+f_8yE0DTDh2(%mYG3XP}r=ZV3pM$;t zeF^#s^fl-k&>qmYpzlE6gZ6@c0PO?)2>J=MA9MipGw2}b7tpVu-$1{E{s8?6`U`Xj z^f%}*=pWF(p#MPQv(XH*(F{QoL6bm}LAQdYfTn_`fo=oc4!Q$09W(Nmv=poR< zpf#YiphrNDf*u1s4tfH#4)i4GDbUlP^`K`!8$cUD&w@6Ao&!A(+6;ODv<37c=q1q0 zpjSYzf?fl?4tfK$6|^ne39d1WH?i?8Y}^id8}tt7UC?`=_dz>AJ3+fZAAmjteFWMK z`WW;H=u^;VpwB^HfW8EM1^OEF4QLPOThMo)??HP(KY;duegypl+7CJa`WbXETV9^_ zOSY3Chx|$(FT5hR#Jw`F*u2WT+PuoW#vYY-t#znT(t99hEWLN}V4@M(t<0ghu3_J2 zCKF8vSLM)0It+U;edd*Mbq>9W*|1ldju<~EFDbXzm_>pQ0Ut4o#rRg(m{2A=^7QLR)ppUwbrgyCV;nO$y z=uNeTF_>Q4Xj+zWB4r@3>rp7*m{TJyM-8X1QrVU@mNq8so}5Ga4uGL9cSv&Ri|72t z$CT{j@kb-nhrCb1uzVKMA_OAPS4rqw!1TRg)3s95DMJ}|I-AHl5r4Go+ausxKO$vB z`U#SNma;_J4@p2v@)=?EGb8~m$!o~fK}Z5Dd7~*ax!xEG^JG0=)(d34P}YlNy;#;u zWW6-O_GR8U3d;@Ob4vV#eIW4Z`;}G&J1a|^RmskBkL99Q8CEqrI9q&DhhO39ii7Fp zynP7x_za_iA;;%oIzE5Xr{zq8Kj<~?WH~;HU;pgL?j{);s$aR&o0jw$8QZ$UsGv9F zawIFGNVZWV&!;$&@Ne)eiii&ree3-HeC^y!@n?RMZy%Uh^Ddu*1| zg|?XdN?&_&eUB{#67>Ff{CT}-X*HriHA*?`7`qNbW6SGDOL@_%ORv851`xUYa%++j zJt@ncgo#97TKD~ODs_;krZ7phk?N>~X$dnDW=WWxuu07^f^5Ho^{U!T5q6T2w((}= zN=e&BC4DVc)OJx(+t(DeLsZm`QBmpBoD^eJR!~$}yIztK^JW*>b!eq??&`v16|Hp1 zZJ|=B2DJ8u;%c7TgyXPn+R!z3bt+}j%2q|0HqvkK!f!+ywkav=n<`PCjNej;LfW`p zB}!>Q-}nr2{I=F6HHmf_Y9A(Fx3KTXOCF+?mN_@8!1N?@aw>iAcOkAeIraH< z%T6Z>o<~WhXXL1bLr1oV^CBLoh%aGp}q$J3%=h6=JwNr}I zp5-|yq8Ft&_8co6Xv&$O+Ifi@Y)m^OGQ=;bL@q71po}l0jIU4`bAz%VFL{EM%N4l0 zRN#bai3;pT6*!10a56fqd9^EWDmtp^nxmSDj%rqPRP!bCVFeb5jww|`uD}ISfm@*h z7fJD#ZrM=qXL&m1um5eTqYH`Tn=i5RMbidt0b(Ju#SXvr2^M8%zXN4 zeEo)0^d1f$$p&;Z4As%|{-Zh~V3yQ1q(wE94AIB6`;kebScV!)hPqh(O%z2FNua5O z%_M9tVG9XcCW^@otrF!Fwl=Ib{MPiI(pzMk^&M$LIgzt-BWL5g<6&=+7dbn>#@Pjt zvkN02iQZi%ZThtTW5V34Nu24b5&iOpT%fHjWB-R_y{d zi3-@Xrhv_&0yd8dIHq<11#|(uZj_s|xyLvA6ni}g#_}gg%>JHqDd%uu%>j z2Zp?JrL5=4K|Mdg4fg`sa-nRwNSgA+vVKW|hs8^!l$S}lFE@hnB!1RWUDLEnXf@-(#e15?X*b7h-V_7K^aBS0%Pl3inY8b(3dWi`BoB!XQau2~wDd6qd$Q zxF0D@L<-9g*>WsaVDSJJD^;Q;HR@HALcQb|Yqi=-DGZhr9z+ULk-|gq6dpzjQ<1_N zM79=-N3eJli^o)A7NtPnRuAeYPq&_s6oyC&>yQF{QXf`q%n@!B7X0w_vjGJwOw&_7ZyY#4_eR^M}Lz+&HI?}&R zW@qai^)n@pNnSVWBS$8A-J>@BZfug*BWlz=YZ`U0s8RQh8Z~_-onnk8d7!9SOYD8D z-K8>FexHg!)7xl?-1uO{~XQS1Yv*y*e5wPFW_vD@V|JsW84PPvt|GRV53(2%yh z6Vt$5m=u0MrvP2dt}@f+6NDQnJ~_)U$c;~w);d8b9DN3T=%|>wmj9pA1It)Kd8v>g zfWH@HsKR`l7jPa`u`i{hQ%Qod9O*v=oKnXSPuOBt0jJBSJycb-Oydnzo#*k{0evu@ zW~4mjba_JRRQfibC%8Nxbub_0DV8UR)U^_Pjv?UCj8T#V@r2RtLt=uvX;n}8CDb#R z{zBT(_wS?7^A}SX7*^_4IxLm;my}+*9~qn5vx2V)Oq0Hj$t!fmK(yC$_7Y@$$M8cc z@f|}60eLmY-V>2eCo|qpM3zHe#tin+nr+j!*OF`@pNezn!HeD?r>WB!ix@_DPn9~b13f(Qi2z69awM*^33>K-Uoj|>H0g73Y+WosUyBnDCCvYF6Ti~tk}EiR`WB|d zUm`i7Z^iK*0y=)*`7I+p2_$bxE^283@0?WCLS<5L`cRz55hqRh4yHs}&Y!__!bk;l zq^T&PD)ZwH2TDWRJ!^SHt-MAUJZS!G)5x{8yl%Cw`tmH z!r#8Ek{z2$H1usaNxqp7eHu>6)%3$&boYAW@0$I!M@2!cK;nZRVJ{ zW*(mlnT2MN8PHXYVzb07O_jNAGvz@)WmIuy`C1E2ilrBvN-bn(eeMPM>qL#u;%^|p z&>Ng&q#PqGe2SVQ>JeQ^bug%_#`Fh zE~J%&T#q}Y%3Va@^Py0u=6gG68AzCvI8sI;={4qSJXU%)X&a;6GxP-*LtRznen?AP zR=dU*ke*82kaXETM>01tM-=I%)TFc|nMY1C)b&U*UlJ=Y_(BpN45yGUkhPMG7V+gy zwWyv%Q7rM@>)U*_##BoS?f$f4srwy~rOQyulI}p-S}Ivwo^%HjlyT!zD=bO7T#CD- zz@tkXMuj(mBCVA5D#=H+gmnz(c<$fZxZhHE0{h+Ho@LY5jf`CX%UqXNMslt5_jB1ia{r#B{@uMNq6}5;)C!`!B=zt1GFr<|QD2#6i<3A@N$TH)P8Q<+ zU6MJ4E55(4uD~eP{Qdcv$7r_#|97N!s_a4iyQC+?=_NV6(|GqJH7J+U6ykJ^;Cssc z>D<5jhl7A~eLDB=$<>HNfFsFpB!$UM$a4|zE2)f@G~db+I75K?ckxiM|6gwM2a5ho z?%#dt-zEMtB`13jzkrUvB>7uLd{QcTOS5~=;{M&I{#|H{6rB3^6h~adBT?o7dwM_7ILQ(9 z?`hV~a)D^nzk8q2eiur3k@pp?Uo2Jc5(zIg zoXfatZ!7y;E-_HoF7aIYzwmh^wS>QP(cX zT+Lm(PhGpvHIh;4+9lCzB~j|yg|6eS-KVZS$rbT>?%I9o+NJhi&z8xkHVSbbr1sb4 znkYxQUQH%$kW5h5F6FuLY>te&b}7$|waX)*%TqrpPXmcc+;)?MH%mC4`rg|8y7cL0 znOV+fu4a{4ZPqdCn)S^3W&^XKQ}+c*sdd18a=pta2kN~h(h1w#H@kP!=K6fEMeWE6 z=o@WD18x>-XAmr{FY86PkA zV$Tq2Uc}Ugwo7C!YDdx;*ZWrmU>az5@Q!0L;Q(_nI|Zs zj6c;%aRtsw-laOp@#-3@yh41uq$}+f>q>>B=26rLG7-zd6>;spEPoxrWL*> zW2jFm+-C@Sa&1ui{R|JKIqDBf2CiOo9%iU;1ntckh(^#pwMWptoIH)7hP#RO?k5A> zAtn7sf9XF4$ofE8A0+F8WqpXO50&*{GQJ-n;gQlzjuQ1~ZxF>k+~eyxBSbw$?2i?j zO1pm*)-{nI2@hN78z>JA%R-_XG-a z-IFQIb5El%-#v@M0{0vW3*8GSEOIZUu-LtdLd(65!V;IRzZj+N{S=l-SZ;WpQ?3#T zw@JeHO8rET=J^+N_QJ!mE4Q1HoVGYwMy0DL&A|tWOd}IaAa29j# zKDw7d&THhkJY8SEc_iSQAy=m<#F=grNVD0hGN;@mBwN#~=KyLI6au?$)K=wKMx!rsfpT6`sU~2s|45 z(k~I2bcM&+LMxU_Qx!3g=OmoBvIOMG2#3x9jKnKsm3u4IZgozf_#>|_ZBgR_DYi|; zrkl{T#Zl*~%xkpb%0)QNK>lFfz6Ti)&>8UPL|!tG;0#b+)o?mx%=QY06BOC({G$4_ zswQMhqJZ!q_?#&3QtW?1h9!nqx`QGq7Rxd|yI02`djVV8v&g!9EpI-qI5i^z@e_MZ zq{Nv`#cAaYX-niZ0iEY$kDA$G_HVGKeB7kz?Lej`GZXw!irQ5<9C_G|1a#!pk*Di~m4#YGM=5G2 z|9sl3v!A4{qKkAYUHvRt?_ebJ=)O#Ta-7%gV>%?|4!7{S>L|MAPoEl=F1(CdlXQ~D zr48fSVcbcEe?{{t)*q>f@jS3AZS06^TNII@s*?`4c#|JSY7q6@wYx6MV%K-_jv*^s zI<8iU1h;qrcBXd}6AyUgO!@x20N?EJPoS*^IWstpmWs_rB(s9}Rq!k`W zYfaag2@-OQTet8}BQRAPl^jJY7Q0!J^~3H;A`;Na+T>FmY6TN0md-&_ z>WN%6!1G(vil+ut`7LQB;g2gw@%+{Ix|mf)(3VyMe_Ru~9zbhJBc1QjoKCJ(TB)xn zqAvE;B&17eRUMh7I9?dtLJUU2)Sw$2U8E&O_ zz$)HByL7W3r7gHDH#$%DFJ;%OGqAq^?{7a&c4gcds@IG*(CWgXB|aSAw{cl*3)`**0U7WkpkA00@kBBt6A;V3A(s-z0q#HyW5+|wtj{ay0ocbHQAhN0OqXeSbb8VWI2YM!1D(M7DcY8!Zgzrz zR&rA#13N=2P1qNykyQa-Y0XQW*b0XEYP@5!q1*(BjGNUqE?{Fh2+idnOvFJDkb}Sn ztA!MbB+h4;A{@wmf9mFPQ5^j#z}y6f07yW19$S5cBuAG%9@xC3tMA^Ezakv)>+ zo&qx+Nv#*p4Afl5;fZM+U9tCl$W2#Mr1I9fm) z&2~qMU$&)Xr=tYkw4BML94#&$?o6Zg5zcf9kCOFcB(-BDJWj&n4YzgrbhC}y);r#9 zml@=?&kW8!p+4n>oh?p2p@54)PBuGOBOB3rADb_w$OUTkPN5X_Bq{g1Ig$!>d*jHA zC%!yFOP@Z5T7crSbK`E3mRy9soO?^xu1BG2%t4QAXCN(23Hk9-vp045@`R^r4<*H+ z`Lm&hW;^H5(xq9SImDPjXPlhYjZs=tDXq69Ei&T2Yd@#-UarGw)|~p#5W({S4*6d= z&$Wy%XousZ|2OCR!4{oFg`_YQqi-JUxY%z3M@zP}yt5J8u;vfM#tb}Bi8Op9nU4W^WLH8L%3vsDMU&Czb7 zr(tW{ma648P8pA6cSgzrI%N>2Yy?MAkO_E9&yw|QSx*O#(Uu&;I_@vZLqmri3o=R) z-7{%=R>(M>-Rbfembiu5(+P};{=PGrpfoX{u``iY%7RauC8p5INbUhL%eY!iBP>r0 z$oHnx3O&_DX+D@r+#Z2n$-X#937-_qq>U#B|B}rq!SA$X6xA)mI90;aBs@L%h+-aX zShDkKS=zK%3nvEURMr4Jz1}o15jToN&~4V4Gpu{YV;1}ainCov1IpB&*5g8R#&H` zsb-?4niVzGY@J!*EJ7#em=p^~-EcKy-K~HRbM432KYVq*IDX7IGqFRh6wxU{mOjL`Ht*OPwMYZ_& zs21ZHt*91*qFO9%ia(ceMoh?l>b>>W>RTSk|(M1)r2Y;=epLe1zoj&o&Mz70yVM3<2wr7y3&pb z`f+s|S{4Ss)RkSk@mL)l8k(UK;3=wsv`bN9UA7FV#a>2r68rA8lj^RM3VZw>2Hkd} ze^yT(A5Zr7QY5`v(yE$L;(f=!bHT*=Y#Fx%ylmA)x>6w-x4eC1*}IZ0pUHSbXxW=b z!QijDVkjVnhKPO0+E7!gL=<3ADoHN`Hvz4_hsdU($Ky>uj#%9t$e7cQ_r;hqfY$o+ zn5!1%N#`^`=B*2}WUqmuEU1vZ*f-cqhS)*DA&Pf!wi+=+hL)kyK!!=*bcB(taz`br z-F=xlE}hTSwT|pbJD-x^5$`C;`6?^nQI~qO!0LLk+i)W_f<4WnkBmAaL=8tB0U33y zW7ukx6#W=6*+N(GkL8W0%En`5z&{@niW^0_h#9y{ihOO4ZT5!#SkuDtHb@jw)#1O0m@SuFe(rjFVJvs*pkcTpnh;@o5?4 z1$dAbN$$vdw?M+}d6tIJ4(D?p=S^^=@(Jjk2E;v$ba591Qz`8W$qcpOz%%5nR=TAfU%DorOg8vp%bG=F>`p zU`!pUXjgM-GV4lht_BS=>r#fzgJYwB=HrP)WO)r|JZo>#NX#I*mc}w8>klU}i{QF> zG?r)mnMlkgxSme~vkp1IeFQfI+?(7exxC4UZs#RixM!Iy%~ob>vyIu-Y-hGNJD44< zn}4M}Pfz?xdyJR+b-_cWa=aP`52H74qL0@t{6w`rV7JH+9v-pZY5%6dnqucqT4@$M zk)Pl#6T~E&ZbK&y5l)n8dGL56IVh9lheg4Y=>I|bu!JbbUbdCLC#rNRO z6u0-xS_ikUQMLWPR>d6OD{_4A$nlTJbS3h9AMN?R+VhX9$n{}0bOj8Y7FFh zoaRzWtK2rmgt_CrVR^=X7%jv2aV^}w5C~`YjPHBWQL!t-9xQ5PBT;bSR?$?yF z*1@?d?S4sXZGt0JvCYT6ZFXbd$k{{N+6BW@;C@GI?Sp~z+$GQ2J7mqMlvc|R(^J(A zr|lq_w;Sobw8w14BU3PhlJ_~KyVPdpOH@*#?#NM#;T*Zc%!WSaW3J+pJDr(MoEbxu zdGNtis!+1onU!E4?Q*x06M*?DI6$-j$8;eU_h2wy1iF`wa5uBN*~9E<_M&s(K4xEf z*1VtDKRLj?EVW9_p`^LEQqmq#Nms{83c?u**P#cI`a@K~?~@6uC!fptwzH9(gC+~oO*TYnvV8uzLdod3Oi5?@fRf&nZcv*8%|Yg1 zbBHzj}oCVU(3Q?TJhkpy@=m_&j`gfFhG*zr4&EeKh z>H#XySg8l+dh3y+^K_2(i#wuvuwQPEhEuqibaH&Yw=E_8$pOi$*M^RyrpKp8)8o<6 z^!VdgUk1{X?>Tg}iB zI%^*7{TbTw=P1h;sGeVo(Y{uzB zvnO_F_S8BwGMIcTotig0ErSV&2NMy$lZxiu1ra3TFiPutBt8c-8IV49A@Z@yx}9pk zUX<_$EcRjXBUQ9!<_PO|`MC>@-vz-(j-1|H7#+VqV#n{I==fb+bNnuej^Cxx@%uA& z{6PBn@dbvriJEZp8QT;ZduVBji$aX4`d$C{3zNchWunc!Kkin;&!2>fH}>qgP*ndH1HU7 zztG`}jRN%c2hZixGSHPK%?Z2;s^hkVfj5cPeQz>_NxIVgF&HlVtVa(*7{__X zQFy%J)5V3+(G9a?>TZgm8%1T)@AZuI6+~*WmExOH8J>lxjaK@0QVjKcT3!-?C(<|N zrDoFBZ1Ki(Rfe~ZC`ZC9?*Ogm8bO}ZHpsWSCi6H_j$LBSuN|w3VlCiusW8^UFjk4K zNJ6^2M|b6{;`hm{z~=mxOyoUQT9WUQ%NfFam(q19oo_Qrp{#Za)rr!+shWUII6HwzV3 z<36;hbE*YxY9fB4Prmc9Y%0gHnGsZ)`!ZD~UGJ>6nsbfslH$kRTTo*()R&Y|c?j2P zc`|WV=Ms`y9>-E$Wx1CTwJK&RR@E*)_S{wr z9FDf@c^qR)Wl8+qhS!e2do2DQI({^Xo_uDl+R+o>oLo0auDc}HOOop?VIRZl%XbjE ziyQh?^FCcwmB&wz^b_m;boyouppMAs;jW>r1Ep4tSC03N&<+X*89`I?K+w!O)ogAJ z=1SDl{gAc~;nQ$+CtY_nTUbN6h4sq*nYKHMrnq$fjZX!GHs+w9tvM>w&g`d*E`{hA z&!AHu=@>gxuic5}a#U~dcg?J8=@q^4j?trsk?5y&{9ILEn|nPqo`c?b@&H=+N)iWM zB_(gTogH+Q0(9R5KLNDF*#lg>FocU?-C5vp7hp5kH)IT(yE}QDz?z*9JEt>msZNQV z0Nc@@`z{5FUjE^YO1RXS8=Nu8kaVT0A2K8mWeA`%lsKKE2s7l@&X9o4P{3Vll%b#% zL6jkZFhk^`U(kONMFsHDaQv3_Hn0hU@!< zbXc7QB+)BbD2_9eKAn!x${5$bhgOvJQc_~g=6=Fzi&zDa6IVJ(y35g$Jm24LO3~#? z;qEDO_%gJBJuT0$+Y;p+&#^4Z1g$mD547!;Or<2*>r0&G3;{#iY`Y^tz8tW2j>A`c z@luSYRB?UY%e0@VkaP+-e|8&3cIVfU@a{?8-Ll)!5XpIwUl`gOik;2`xY4nZs-$GP zM9Oz>sTc}yrX9WrT3VlCvzF3hg9K&VDO!sw9KL2*M*j_ajpOvDm2&P1ZF(UdzX+j% zykXe%T0Fi-%s-o@M;7u0s%Xwg(9)3Ts=E?dOAuXk?da;%j*ez?HPOifwNZ5Sb#(0d z25e`o@T3$1;`)IkHN1wUm4Q8&pwVao=jgm7yT!JD%8^6Rn7bK!e}VY0F}El3p~F6G z!mU93IFzVq5z~PZk>oK~`xnZFeH3ZUd7facDo!3j;Mi}LND{97dQOtP?%7)lBrSTo z2kr3&(rQsVt(F3G%nYCBDhX?KQIgxOFHMANQL7Q0#)GIufT$L=;yFfCi(1vLMXl*b za}lH#arL9)9@U~Cszq&T*P^zyYf)0yqITR+qFN+RR@qXEII|si7i%p#8Ue}lF_c`# z!z?{g=nQ9|b2P)gB}<^wFANh4&>?l^W^Fx+4ym(rQDKK9AgxZ?QWu^mT92osEp<7G zfxI9`o*%F`?5Fd@(_L#ftgf|P(yeyG>Lyi8yCfC41Q+VVhSj6COVZjUJ!`w9mn2Ru zNgYQer8kyP#wOAQ7xx6hJ`(nou%Co9CZ)osPQ#z`#eRcRAiu1n-&WE;z9mqeuHeU+ zQvde{IWH=2Fdg3YpZX||%Dj5!Xj}6ZcoYM&g zb4jeP>L$-1$XH)j`TSeWEbHqWpSyx=r+b%S{uT-a4UW1YzFq3EB8oz?t}}}g;tU~;=-sCE{Zzg;;0iID$Wc$VF9TQl;Y(S zLy5X5$XrS*rPfH*Hghqpm02`%PhClC!}!2k7xQUPz?qlHIN3Fm?&=6R_+fW-q%?xC zyE;m0Di_PXnsz&yo4&dvusI!iH5+ykQbR^cc%orFqn@WDdXznk+U*8)BPH%! znvv0IgQB^HVIOj2&DAs=Bt6uTQ4e)g)I%K|^-!B|)*W_H zo8&S_)F0w4R$TunVs7})%l&BQvb1CX#K*?x-!%+(E>9}=1@vbi>GQ3V|9oLuRt!k1RHY-33Od5k^3T!ycgun)a1mixw+70(}Sx*p;4FmSAM zMWOV&$8pW}`F9M*JJ;rCb8mkFJq4c=@YwkZiexKv!XeU+u2P(hSD`wpIMtN)e@!l6 z+pa1xeC|C*I#-rS_wu^pDiL+LuSaf@k^2p~iW+sgZ*bkH)$5)Zbun8}mavc67DeIn z$a7M(_nWw4#vPP2+_&UH4E4kC@yX6rc{09k$M!k7;;&O8f4waiN$j~RgA%sduSVlrtKvc#Ikq)qwY6HQ>D{Uk>N%eGu0Gsh&HOEOsixI*G2j8Ut;fN1fb; z;>mZF4KrZ zC2lK%(UOxhBs|lw&N_=4%^>GanuU+yn-FS(@83l$m1=6Wq*i4;)ml=k)~TJXQ=`6x zjq9jsFs>UJ*VD#hwXtOF94VP}1nykgb)5SN%^uE`?40KgpyPMGtY0AEg%V!G&)e-% z{V1mcrFrjClPRlCdkllSi<=VSw)r=eBr&8;j4%t#=LDD3-37zG1 zER;#?7-~I9p7x$i{fpH4u~h4mJl#E~W|A_Fnu|2*b7`6@=I2FI-1E67672$N5R#({ zc~T<2{R}npb2{&$dv(^ujmgP_vxm@esBd*v`O=1 z^7UnOT#cwFZ4fn|hLMLamuSNN^YYqLyesq+kAB%irEFw%QQ1-j8|zwiWxQ5hCCIf( zhOeu+!CGDE1{QaqSId+m9KNoR$iv}_FUrYmJ9Q)#^SZ&jS2vZH;i*XztB1XX6Xnoj#1*3! z^nT7fF>0xeCe<>M1STVaTd|lzr<`rQ{rc|e7wR@j^JtlUT;xp5kt%s{bOQ3FT+NNn zK)#YIoY5)B*Ql4FSH3~zrBU5)Nqzez&eVdSIjw{jn%mROEFK_2991bN1bbplPK_2HqB)b&jkGN`HPCdOG=bzwVy3fJWQo6c_o2=GU z)r;!SP=77urj@!c2zdH9O?I}LDS8OF2E!R7M43WL|96{=W>yRGvHvK=(poJE0-jsm zuD<2!ceL_lc6o;!l4yF_CYoNhji#6FqUmM(XnNTJ(@RNZI#L zrjof*&3o#}XD?mN@2pkLrTNZQ)$EFB`Z*gDfUp7Ig?b#;)j1gVc?Rk~PVVfjR=aYI zqK~!Hk!uuvqukDoF`XKY?g#>A_4%M?Y%Rm@`NODj@zJC&!gAU%%_ zQSOka;R}?^Sr;m|8=XzeS2Ux&hl52*=C${trcGBlEZ+xOjKvZxmSREABUnH4Ekc(UO0XH6;r`*`uLj^(5C65&r_Q#c%0&K<##lPdKp7z zlYbyW8n&c4{E3YGg~cH({-!q4)mkRcLJafYr9uo)Yh5|HSPsVzR2AaI6=*JlC?`3o ze8Zsoj@Dt-n+i07hCIXg2Sf59;4}cFdEqtL+sf()cHsaz^?8gktLtBoJhBQ#ELqi&*&)Zf>C5T$nD(Pp?n&gCSuzE6(Q=Zk*&eVET7LcwiA<9`# z9h%`kNKSc3H6*7TDYLL4)+P*rLv`(Z_Qj<@!@_Hg8}(TCNGcaHYF8onDsm+HT2{OMM$R}Qdy3OD$5h`%Cb)G=E+o=e{21u zoB&vxF;xROWAf#^1SD-xoc$E4($iS1ry7+jk3R6j5B&H89)QR(`xwjV!17(SQ)!MN z7mugX#i`wQ(jJsU`SV}9g&yc=N7n%9Ie$y7B~RjYdxoMX`J0gq-|sT4<}|XeTx`1x~j_k{;IcQg%1`0e{~_hM(e=R~FS=LMx_4op`XD*5rDpMb`{IWZ+tD;kn8$XtiJ|1?oyC;DH5qn66gIZz7;MN;~H( zO;Yu^WJNB$IFVjQz+s7$sla`T)=S+D6qdQqQCKdetdLUDFaK$;Dp{|V64w!5)^+JQ z1EZd7sZYN%ushRR2acte4vg?hofEt=_du!v-;iiQ$5P5?sq0WaTbHjPH{hsz4Qt9L zMQX%TIdvn7B%q4~k|MPtBl^8(xz*%+%1#%j4ONk*{6LYqHP4+y&`fgM+?`76E!;aO zY$->(l{<^pTg&z~V$)W_cCw|t;Z?YUyh?YpM^9dQ)lP5Rd5Ru>v^wx_&bvBuD2|SN zT}-W|7ZVb6;tOu-u{>uUL1!t#(`C+l0=X`y))zZ`WvVOJPPINiSVYi`hBtzA`aXj0 ze5ul+{8%7f2EpN;ppH(?wEmE;i!t)wG7JA+L4|_EBs@FQnMij`HMQ zh6;|FZhlL_<#LwGH@}nYz9WXSNH@PlJd!VlX#dH>vEmiO`H3PrT8_x>N{2H{-wL24 zc6-i2f)RZETYU^ThFz;ZDHgM1#q1L($5EGLs82nK?RbgpvkHL|1U_#R{7$irq{)h* z_Ner)5x9J>+~qe}pO~qp@FcfBg(r(QPLYFhgoLBqCba!ju{_OX%hSb&qs4n?7+x!9 zxYye5nrP!zDX*>F$!uqx*_Kl4ZgW#})E|Y?+}xJvt)cJ#BDtu zYE(7d&!tzmIOa^E)%4%)OMyj~4GeX0eY-zz;RCNO@kDV%xxA$)E~QC7miF8`X=zDF zw|QOryYPJm3A)jhpx=5W=yGv_-bv?mpu80&=8|=y3k7tCRwDH|QfEuCeK#!wKDy49 zZg{mrvOC+A!Y&or-9sDGTtCE)ZAcBNw13U=~#yQsW!+e+J2h+R9_Ro0HL zN~e1-WuRL8(jInoBD=+;)YX1jKvYlS>j=C0ksX&;o*dBWHWa(Auxk|Aad{i-bUD5z zV%HOPO(Q#wubH;vJ^Oh;Z9N}fPPc>D4TW9D+IF2{@pTrvBVpI2ww(+M+ArP2?kL!GuWcv8g0|}^cE`f5 zS8coAv2^>0-3hSkTiZ^4!Jz%pU+hkT-GIoB{UX0v(CH2myHjB|II?5E4AJQ>B^oO2 z?M&DWi|m$>a)fSgY*?xzZ1mnJlKt_ZFgeK?j*6h0Cp$Wwv)FmYQKyUyNh9WYGlWLkt?^_?sT!c40fYy z+nu4)<$5LEyLkod&Z=!UM%%3*yR*g5SHW&tJ^-oui3gxE-CR z@E_!n_+ixZM#cic9)9Xc-UQ5+wO90$K|*}>~4YGm66>_ zQsf1M;y<>#TI}fYejYaD=MCJivE8+?_^uPXX|TJ#c6{rZC z4W{(;HkbUinCa~`rhKgNn7+9Trqi`4AMF`&)0wIRZ8RT%>8zM3F5*ZAIM&(7z$%#D z6*HYv6DvDwE=(VU={#-9#kxCgIv=KMV7ee?x-f2f4@@6{>7tnFy>Zj~V7d;bi({rs z;-*Vs`ZP@MkC`run=XgxGca8dGkqX#x)P=vV7e-1y1K@ctILC^E_A<<59~v7=Ywk& zSC@xtO!*M3L9AO~x>m=^2lkN~Q;zjfn9@Byj`cBZ%CSCPW6DSC37EbH({eS+hMvXX8K&*^m&-R3)9UpQ+y3bH<&Fj z-2u}VW2P^~V|^K>AHejLm??f~tYdu*rgX!Ii}iZU6dzX7rdwh9DNMJ;Oy7)W;4PSb z0n_a<)3@WM@4)nHn7$h`eJ^hMK1}J#J?C{t%yehmbQetb!t{ff>4$OCk6^kFrn_UN zAID8Uf$4sjei}3VEN=QaOb^2Ji&ocvJ)f^f5O%YZU54?Tr_$;D23YgZ`gXF?ctd1KXKcCVH`rBN5c9NHj=QZge@d&EnzzeJ4)C^!tN6G zlCZCY10)QWK)`yClZ~( zo_i(9cbc-8jm2G9%)w$V7V}K^TC%*GBFeWqdjYrfe{C$~V7bB+!qIl97VEutTI zM47o4>LpO=Dx~Rf`;FC@&@CH@unq z=|>>eK-xBk{@X`!z-KY4o#k9XOUL17TIt0=uGPo&%x-WF2m-l=Uhjm8PFkxEZT+DvKV<_o~-YMmYbHcZ>2avDQhe z;dLd}dIr5Cv;G9K_=D2$*_RE(3s*Td`?MiBQT*yGq?JZS^eqgl@jt{B{x7sc6COm= z1k3(80+U_|;nNTOjHWADnrQhPd$X$;W>^8IVyiHP=1~fCgwLS$7E-pRl3q(ARbbLZ z&sOVcPmWLGX?;KSEbEYjoX*7!96YMW4UzYroKQ) zz_Ge&sTFVzy(o8yDAtZTR>MhBW$)zhlk=S=i7t}Ut`6U)=|=CyGY6$gsHM53=4j(M z+K(H?UDSQZh0^i)^fR$1xM8fUOFuWIL5^Pv8NIa1GTx#56rg#iBD`x6VAXq=dJjjA5bcq;j|^P;y%tptdPA_hH?c5S zMjI1#c-M>lumM^Af8>v%U;}Lp_^BFoRmOgfmX^B4N~w$*|^5kzZABbBn7TB#wl zUM!KND`-_M$&%eO^%(A_nuc$|X3eMEl!%lqt>($1AfFP;ksLf#Brm&l)H6=9khZ#3 zYkC17zyH=#8?h8b>n$0>%D;;uFO!W068U?G&sRIJbD-l(s$JNW zlKA-3B39uhinBaTzrr_)Zs%-&VFi5OOu%Z-$riIog_x8~Ws~n=A|NJau2srCO{HW5 z9~b!zz9^>{4E1|f@-Ri3R@6uGhh?k^k1o_#IZde2&?h;`qJ^q*nh{k=!CI;+IdIkd z%4pQ96R8`QdX*@k7W-1I3-CFw-AS(z=!m*y8MKelMlQk2S87y2O6rhYnm3mfqC#=5 z2q*~Oj;wo8YcS=at$j8b*jp)3B?=37|;_^~3tBi6f+tyO&`?-{jBx}7PJdxLrV9z-snBe%`pG7`BXU+Q0WP}ToAR1<0VmR=+@&{M8(Gx^9^e1! zOO(wA$m9E8@>=?wBN+Mq7mK-k1JdLBhXaZ7I8sl3{ORzwZRC!!YT7KpZ|_Rbi{NA{ z|BBU6ty?-r(AGk!lpS(x?ko8nQikeU>JE>I4ndv$+8O0MJmq+X$2i4D$u}JX!av`8| z0YWbL*HG2ebPRQNwq&V}#CT4neHtlsk74D!w4_y+{oyqF465$2D*fd}pbuseWPJvIW5?;A>ENYQp_3~AjMnsAw-#vq`pTPA7@_~aP!FYo(Q19}H5 z$W1vCHVaaerRG64g)OATw3PK$0oRJw(t6s+_O`O#PFhlXS#J=OkWGhxfAG{%!cMZi zv)FWzEnVrE%RQ9K_G$hdQ}=Q%KHpg8d;i(A=BN2SsNB$y*R!ViK4VYr_B^_k95f~B z74YxNdP`z`B+|YT_6yq5_WqLC03+ET=$LBg^-#%1Zs%lUZ*-~&|5zmaXk)UaHz?W4 z8Tydl!5uw~smKI?<3}!h`Va=6|K33VB}yU1QS#6KtR#;|zfq0# zIR{ETWl1hlwQ~W`xkyV9!d#e9E;6-q0gzl637!vT(QJpFYjDIf*@l(Vi}(*tc08?E zYGVs2N-lMIx+s8}qU0H7D#&j{7AGh8;eY~}w!KH5Ob3NrOHN79aEBYC>NDD$3FIqa zqY}GmbFt*$8`_)|l&~L9O&p}nB~q9l0{$d1Z9GjjmdeKcWSec3@#Z5aNL12*S0>6Y zq~uuTJbkZ2EkDtjlnOq~iau>_<_48@XuX|t3~jEG36DXes*LB=bxHX?O~>Lq66;70 z<LFCeN*n+-)opG$2(tEY=ied$`VLz@O-Lu8l*K|{70?L0&KHWI6%(5kUm5z$oF zYEnj4Z7C(^Gcs%{9_HtbgJzW^oFQBJ#MDsBIhRG2{KC2!b#G2K1vHl<=_QtmgO>FO z&P=k{O0KHVRY;O(VWTh8Dz z+Q}JD62A!2P+yT0v}c!|?XY({$kC&hO&Dfb(6NfPk9FwB>P9Cyh5CcHmj|7>5}f0_ zPTM<+KlqVBvm)rim2I5!1#Oq#y(yO-3p6VO{sgjduJaddmv8c@Bt0M)bmyeblkE49 z>=&4p%fC$QX?Rr@{~SH&bqblBpJb=@mcw3cdg4bAd)Gak#6CuODQFociACC)b&G*jSDQC`{ znR{mD%v?ybKl+=o=pV5+YAZk+1Mdj_VuYH+H@ zGo{r`bq)P%nd&;6QIAd#F?6OYSKXZ;lNt3BR4ZC9PtW4h^?driRJJ49AZ>BB`whh2 zNPh7|%yvebC_10?un)5qlL7I}J*Pq*-CnWQm7Zx!0lK%m=- z$=_DecqoGOHfbNZ+M7+ZjZM@c?~IJLi-0=hijBz=S)MM*VTUxyVfGpxufdfac)C;a zJ;xORwv+izku*j{CsBC2w<8U%+^X3Ud)FTk`&>5u$r!jSb*450#%{90SwJFnPhpcE z)-H#hJ*@kcx|m_7GVDeTV`HaDZO&tA^s2{Xbm&Sx$#LC3pt&-c@dTd7a zmF$&<(3p$5bwK;1?wWhCWRu7la|L29%D^)#1z|4AY|cWsr8yVPhy*nuB(Rb7j5$6i`R8-`DmL}RuqIwbP(XhxXgur#*%4cbFGutCBd?j!`B(<*@<=xk2t<*9;G zAa#yNlL~c6xpRd!LOBcDNYPG6$5MaLwHbNo{C0zKeDNqZ`;xBMgKAk9n2pFE>v0)j z-3z(hSA7Hy5GIbHa4`ve5x4iMKkJY$A2VkH{-#^qnRxkPlUCMg$jk-2gqE#vF>tjz zn^3xemWv16KHy7D>|$p^ZULaLY^Q`&zs&3suC(3!A_N4w1k*k_X&UM>yM_&QxoIe@ z+qrT=^a_>(R))b|Z2T*g_ZNKqUZuRN!NbsDgHy18&33z1ktd+hmR7vp{M=xEpx>5< zH{mMpo0UdKyCC!5c=Hp?PBl1VSx>CbCpF3a6Ty*cax$d+EhYp7W=(FT7Dl5c7U}g# zQ{|yf7nu|L|Dpe|KCL_#KTxBG_`&9XDJ*vV=YXJ87na7Ze<{~zQLgvYPvJ9+{PV$# zT$D1hS@+T@BmdG6C}Cu1!iAB4ZBm7mC`lZ8abe^S3?u&*j6BO34qJ26{n`4@c6N?4 zJ3H6G{p9M?h%C*)mUw4yOZ+5l38wi|lQi`3lEb^0GSdWGbeC!YzAz(`CbLwoMmR2~ zPcg*&DI+^B?;#1*NJqI#Q>f)Dryy0h7sXtlx4@#?edn^CSv6-H7pYCpn%2WAiin}-Oq$Ol&1L#SdwFC|C|~S;8HIVlaI^L?dK?$M zsAeJeOWb!5@*t?=tCdxgC+dP)5NVV7oP^8d1$2fx|~yE1==Cx-F2z{|ACn0%7N zDE%7vGSBv^$F*}5Z@$9roqQ9#LV1*Yn#9O_6<8UW{eQ*Of2*(fb`0Z>fv*@56Mxvm z$h_^n3Ssb7Lx)#EhgI$p`}pWJB@@HT+;1T8Yl<_2e-BzLd$qmBUTd$5zE1O6m%G12 zxLKlCCx=C3nb!>`?g_)pZxH&1k(8%~e(>uQ1~ALc(dwHf?X$uhhpQMS%r41oZ!tmQg8;%m;R`pfgKM3-YqV}Zcq z>n#O;4c_oEPFQMK$cqyRv?%Lt!&478&bSAaToK&~Sa3cISMIp8RF@9Fir>in23%N~ z&A5kDki`jz^giqxlj>|ib#4&ZVI4x;mrrQYPU;btFFi!iC>LGE!q~NwH^mrP-waG{@wU>{rlm1{!k1FzCR3Z zKsWR|;m6Gna5MfGxLJQ9+#JH~K7F;?2hbNjjVfQGwn&0|Ro43=pfPKzR$;2)%f7ek zlrJ-$>y`gFLT|u*j379Uw~0k2?2&51&&}osgOMQYhB}BBZdJZ)OZ%kqWn}{Up5zBd z$b&$)2BEVni0p@hjQwGhwP(U$sXYg*C30;FDN(4EGR-!WP-?cJ1ot8LQ-T|j)f_{K z5>ld+lqe$&%IQ{+5|yMx6)7=%zvX24SYP*{i_R{L4DmJO3P73*2WBM9nx@xz} z=4^uA2OrbEoVpmw;oo{uatLjg{*rsrMREdkU+oLX4iYgk-ugIM@){sI%0C1(VwFbt(RXN=MLuuk~2B| zKevcwnKeOPKhKn9HcK>zC7R0;&0|^m=+0+}7O+GMS)xT`BkJ`2t#SS({{VpP!NLv3#R|Cn9!^;~@7f(>Kb&>MPh8Rx+zq>=DPa3#_KQhVEJ` z7~)J1s&#!(qsIodx*@mjRHPzlGtCo{G?~D9zO;evM!K6Uy}1&!>m}{BaA^2m6Zt7n z-z#EkZB1$Ce+lVs^PffOwv#j`k|;Y^#+_u2lj!auQBEdNc3VN69xdi*^eG#W)lBCF zyuXKx{^!cRjDxIi?#Ojx+#5FMuRzp z^@x*EHLAF<4!*39Rh!Hj_~eW$Zgz##7l0yKzA;d<75E7LELE>mxH&<-tjKIkk*IC` zZwa)54&4mHpd;*&{YJv|x#yyubHnc=#(6s>v1U~pepgT(5Di=Y5a@gv|7h4Ud<*cX zk|B%h3yFM8HAzlGRi*b1LNDe)NY#p}zYCbd zx|%Bd6u!KT=_ebafU_b{9Qa=bF^($!T%vs0R(`2sJ76cS3>NrY5%F?`VpCj+Hj$rgxc4 zF_D%`?=eHdz2*m-O=YaQA1~nNbEZHKV7M6LOti=9t5h5Eo#)9#I9HkaPsfqrV4O20 z81HNjCOB(@iF$%P$$A-0E<^Bjs6QticS4i#<=+n&g0sUvi&^btv`2cmlD4iv!Y%A! zW}FN@>tfHz5cXGyQJ-I>M{!E2X@S z)@c`TD94u#I2}m|Xz?5f?d>M;s$l8Vxam8jKY0BYQac8 z4b_qY6YFMTJ@k*Ee=Pmu=pRr21o|h^KZ*XyRv5@Y6xtt#5eEO{CM$EQRF@w>#;qAy zt*(&ni&NyKSy`?ovM;cQTNyd&$zwxmjml(k2G^>RVoQ(8N+o!3h#e}6PYoE8l62Op zn(T`}HKI}j(wqU;22;U|byifOr|1>$Bd9(56!Kju!=mthPGo6Dct86Le5=4HX2fS= z$KylE63!2vL*VK_WaXOR3Ak(7O6%yJ!0NAO^*315rq|=}wAIFHHgE*q5LdGS?^>*8 zRtE9L0-)NgYI9O)2fiGu*u-*(8j8eeW)-)qsYF914YgBMFo_nTQ*g3fDAk&iuv1iy z3A*YeRp9fsK@_A?o)pYWOL*#3%+Kycd3^Elnw;UJTsz$kRC!@JW15KpYbpfkg``9OR+Z#}nVb!W z@r$!$3YWe(n?hJ#WXWZ8D$ZVp%;$)5Qw>)-a}|)7^Re(@tFm%U6p;v0b9vc3Rv?H$}}bJv!9hU_FQ`iVQn|bt%fQov8 z%xo8hK{1$Wbo6aRa?~O>dP~xQwYiKaLp@ye6uIArFO=Tp)`#AA1z<3tW?1zv8B&HORVPR@G>BCfS=Qg(|L;vFVYS>2 z+U-69w`LE@qt2<#%0|nYMgY;l7ajxRHX`cavrholEtWv&;2Btt9zacU;qwR+^>M6& z_nrc71jpiwqSJc@pozoTk;+IG7xU`6eBn8GxI!r!&!VpY6*N?#nOET}65VZqwxXFR zRJR2hix%1#&8_=?r-SmKUh z_7l#XV7cEyoDK%vpbBp!fT}|r%D9xC{tsu!{8CYaFA@eDqD$&BSrd%Rcw{E4`%x5+Wn9V|Fu!!zrX0wFZEah^a z7{v_7&IF-)A`w?5>hX|rtW-pnE7EG*ZX~UqC{St{5KFBxO07u5W4U^&mhV-Fh;&6Z zjwd%hUBdSah-G9Y-}7%mqzO5-8OE^NgXyvhBR@j-Sqvz*-bX2|4#6K%$%O$4G3SLYu2J+p1Ha?lC27$0==yX6x}k>@ zV9-O{jQ1eiERBKW=;rAb=oZl}rdvX{6xM2WJ4}0=gEQp^eP8zmGWW9fpfFPAw~{ii zMVX&Qi9u!aFX+7=cMDFFvdiiAWXDHh?q4>SW8;|*1r~>^8Z)v;6T>tkzfNtWjjeTc z{6Zz*c+ZKkrE0UD%yl)G+kz#LkVgI;nVaBq97o{09j6w4(mBCLq3}+B7c)$S|$X;3|R{cq|rxEc*_WQc$i* z(ETV4$YaS3!Iu4qT}f0Rdd#k3%%TPfdlDser@&XtUO!dwk^%xwG~C$;D zFcn#ma^?b-X5gsq(f$Jbx@v5R*9T9T5IZHETuo!D8!SL0Rs`q6t)#z-{vq^N(_ce> zE&X-D`-oZ3Ck?^x;BORdvDHMkneI@!Ep%Jyw$U91A^d6NTj#CC8)KykHUy}5D$qC< z4@+N+IiBtWx)UuuTo!?Az14`v%bjJS9i9MWu2hiJcrBy(#uVifZ8USDy#awEf-P`I zvgo6jXcyhlfmnIy4rHT7j};Z$TlA#PReAqTEp{Xtv9QS+_1fQs=-6uINiie^|9{WAv?G@) zjCM{(RF{%(er+haD7O)g=q}uS@OY6RSql%(gwL08SX37b3}hUZ88*9k$QG9ww&ls} zM#QtF7IZ}i@C#2qIwZepqR&h-%(T^HnrTE(nrU`$rny0x<_BfUn=WE)iDW8*+)`~# zEI~2B@E-3RJfz`v)lm_hFIlP4DswJ8nw<`Xy$HZji*@z_cwE`u??}W__RR*Fs3;i45E@IM)$tX+cF7-cw=gXMQa?DfR<@pkKRZwbIs-Vm+RrzvQZL82z@|E_M ze3kuCFa!qybc3BY6b5?p{pe|{0cETf8wk&mBDVdhQ zt0=WRDeN$QKiug1y8(uiVD0{C@OSuf7e=T55ZnElFT7;>)|MDBb_?v+f9tJqS5 zRfEWVJjuP9*V0`_a-YD$ugCT%d#LVy1zGiYBJO(@3V}8UUKzj`{K2GQmA??4 zv2@4zOW_}n`#$u^40C&1a4ARw7^m-?*a++EPu!9Ao)-5+6ULz1{FkmSU=h zGc3&%>_)?9C+1dmyPhh0j>lysp-?^17Tp9-q&{E7#+{aoESXQ>aYh<9Cv?qX=3hhN zP9H>E*{)&PX3v9ErWb$M@LBHE+ngae8?cAWqMmC5zK5^5mU3Jp!g@F|%R@o?|j2kU$2>4LvxN<&xM0 zX;3MHE;T^}X%J1$sjE#8K^i1S{UDZ15P<}d%Avm+oKZo4CH+*+RFLBZLe8;?ioB`kT9%rn)eO{ECZjG8SyRGOib6dcMD-3`)-NgZL#cN4}etFx?~ z#RKLHORPuHVS8id#4KG~GVu0Lr);STvy`cv)~SXc?OwS9l9y{49eQQXKLan=zO3kA z59Hj}fhIXiQSpiTyJ66R$CLCw)ReFpzIjqK{Y}*rHKyo%)eZBwfPD;nOSxeO(0pk_ zY{?lC4NuWZ*nD&)>SX<{%J8KH!Y=y#7>Tg6^haU}G$WfB7O_~1>0g469v4%a!P6R^ zy~1Zmd;&&0I9O_p$yFm0_X#AX8p^|}1jy-@oSeRLM<58NTfDLaggDsa)B|apZpqie zW4FH@sDp z@>VhHobZ-GDcwp$&Zl%UD5YC*O1IK}x>-5dz+P4=wHlSltpzBT!BW5BhSIGK*sU5P zIn!v+4%LDw)h1xtC)3|_!wo4oOAU=qNYOrtY8s~mcUrM=O4G)voP)rmaR^e1l_iZ+ zKA>@`EIs5Jq&<#JQGJgTdc0{0g0v~BJ0;?`%-T7SSq(^-)u4W69l@wEvl@^v zt3d;pwN=eC4bql2$jE^WLXc8ySkfTF2QExLe#PPqrCF~Jmb{5O!NpmyaCC@x0m}E z(0CWq^WX{iCb*cMN1umpD*h}NdYR}c_+)dnCU1%o9-M- zU#xzIH>XDn(Y-I>)&Lh;n?QEIE`~2;Uf5qC$;+n^OLrN!2DqYJZo+&98xF?e2OAD# z84^amWN^C5{9LX4c07bRFEMs_EjA-8JszQxgB1~Ki9Kt@;d1zvNk7#OsrqPXinhuK zD^rx^418GCkaz}>pH?C6acti&VTvL{fI++BA;2K=0%s;G823f2Qm@9F@?m7@mz2*D z!Jz%|h+q&|qaYKfsnEj6lwexQE`$-0j233U0ri7!ssNuxJhj^a&;=A+>@;?L5T+3jSR8Ss^}88?Yg zNm+)+O~^8VY>e*JH{eL4umN`?>F4ZLa=}da%f0zCv&!Yb zYmiW#t+$=DC?HTKm$ME_&Sl6S6g7(+SZGN{M#PAfg#Wq6NZ@3vlDZ5c1#S0-z@cuBB zC7~`d(&Cu1iaOoCq7m+vqLJ>0*-AP6!=U06d9(=lK zz{k_$>ar&xkmh1Wg*Eh<$YO~j9|rt}S2+3YQn&gSv@M_G@EHPoaJtBGHJZqJ7-+%%K z$pcu}aEnetAhZ7rJjDaE<*G!IZRwzF&4s~nwgmmzmKP(TB-;v>FV40y$=2A-7-UHv z3(30bBe2<0Zm1aYk|c7ZBDx=s2*d<*fRhyl1=~7;y9HouwDbeTj&yDUnZaaaKd#a* zWIUXjz!xsQg&3il%=jj@eB*Hqfqc$c&SSvl{^9c~GdT2MX?20h!ng^PH2CbwJOQ5@ zoH&Ac)`4yC;cD_ENb~gcU-Hb;H?t9$DT1_8L`kI@gcXxY5u_Oe zNu|QHQjG&EC7ZA<%}TKzk-EgxBa_x6o7AJ3apQUrq}k+>dgPOOV0nXkWs^CN9x4<5 z<*ZlB<5I6HOuY!wdJ!e{Y8lvA1Zjqaq+Ug7y;=v>tF6CYfh-&i8!uU2RTs*L%1O97 zPF01nc0_~AYNZ;*HKq18iMR@8DA~vzl8(BzGP4OWi`9+!oUAC7IG8wvqSfw%P_!Ib zgmgr?JEBvfKN6CbrRs4*%d(V~4CKTibl!*noh;I^~z%IeGcah+6nS*#X#QjW|-49Q- z3#CnT7JR(d5=H@_SEJp8|1!-QiV2Tc7ulMfgXBN0T+CjhS@2$EljNAbhQ0kz=C5q6 zt4%N_uxzdvN_$1~5d9jP)W#Gxl6_Dy#K;*o_d|}HY@2IMbj)I-`FQ&}12K(_IuJdE zn8$|0`ynT?CBjU%K}Setx?QE*biFBG!cH+v`RN7|oG=tjzzM4)92LWqr9Pulp1R2t zEn%vg4K!2Q!a|h&dedH?Fh94LAKXX5t>M@luIC;>Rx2_ef`@C>7j))#@YDs{r{+F@ zr(PS)r^xsQ^;C=VM?9PdUbEEG4SBV>kBo=QBNq6WGp(dVTgc((MI+G&Qi(>CNVJ8c zkxU|~3DR!gm`JpyRH7}COm#f`EGD3&_M~Xd>LpXOB`n&@4U{=U)vHG4w4^epHIX?> z=Sgz0{3b{u6KZo{z=>v&RkQQQYLi~9t@%IiS{=R($zDv{gU6!=(p@Hp7kdk(J(!R&V?TZ=IVf>v_sK%$)5zdg#Wjmio_k6lX6@M;( zR_Q`D8M1b5_H&57Ld{mi-hJ?`^oEBPCy(*zn|YW^%IJPfHA(_%)I22&_LnaK)wqNOqkc)4vI#|dPkyVPS+=2~SW znSm)A8JMuqU8)Ch6E-p^E%Vldjkcw1gqs*7ems%6$Dn>T+O8TD?5Ls|_u>Q}3egE= zkHGuPk$XV$(ur!6GV*Rm!b>|-UczB8yqXHU7^b`g6V4Kx%r0Y?^3q8uFYQWs$y{JK zfR}KegQOAj(nE%MW3d;bgqLF2&r1)R*fB4~DB-0T#=Io5?_|}5b#757J%Vp4kfaA9 zWCT13)K*qFZMPbWqbR~;r=)UmPr_P{s)2&hptPAzO<3!+e%5+SLEt`D4LC702yCwo1wekRqJ7_*mwDSH{1u-EhW zg6U^3gVJ)Jp0L;6l)cPV^$B~u_^<4>4j6GRViih;bhi!PTGIIZaPRJi({DfpKloc zP5Ms^*?;6S|47RaU`i7Y<#3<=EG3@U*@kJ5e zG&6=6CH*Fbalerf>0Gs06|r&8Q`_UlJwI*Sqbh0LHwU$@K`CV~P)YORur0Smm~W16 zn>PK9`N07m{XHcIOg6G%zptJW4#zn86~4QpW1u-{kGmlFYMP@4TksUq?iv#0X$ z2ZNdxmwL$i{UY>Yg`;_OtoS8Lofg;pQk7O6OKY(<@<)TJZcv){K0dhex+r- zW$OH6^Yga(`8V_Pj`{fs=O+4&c!Ci_At{uW^2ziet^6djhd*%gpUgz#TT>D#7{h)kXy~2HL_ap&B9VeI zG*XZv;AW-Hi$%aE`bEGksR;OYL!kJJ*r1fax27Th=2!+uKNt};2H=XPJAK77oDIdj z&X(et&WH9adp7>hvFF{8JM-)&g?9-{=l*C>JS zGY{`KKc6!{4}kmo?D_Tr81#D3mZhWZIZ;aAtz0Z4XBR_nagTDbj?Aa`Di;gMtlfR$ zt;mV;`+h|gwM8O4WMd*|xmZiii6$J^b7L`?ArG4ntR_=!{fH^?qvq!^^Ygg*dBPO@ zNmKC8o1%Wf{5)lTo`x3v8G8r{d}1UNea7A-`C?_6RDH%4W=^RZ!<4FD98A?1QbaCC zm7g`i#i}w~+7FMYnP@)IAU~j7tUgn&J=ibTzGMVWBG+QbL`9wSoN4G}got4($R6q! zWX}&4WG@U5WYm_wXdYmlnRSg}BEe(WFTr0jwZS?w=fp1?0^*#8y~ti{FSZ^KN?}R) z1F)~MnqU&R!LmAFO5V0UhbcqR3tUN7cOY8Eob!_&p=N@Y>ig zv;Zyp#$|h~?a3w^?G1zL@Coch!0*xTd$PMX@MORa)o-Ihel;t>d?i`>Fj7uOj z#A;S-&UDTdDCY4ca0G@xOXC<;?tGw;@|~=C3@Zl>mo~qCu2{!Jceco#n}vsM$=x1*QSky{k`n1{3-(Grma{qa@70d5j|n)vo@}&Gnn}fML?CR0f8?Y^nfg!w%N4rOW9QOjaE(MQA!Q)ft1ny0E9@35IW^kX8?!jT=IvnwIiqUx-*pCd{ zjp#g$SXA^t(u~fYh}Y&ipM-0yt>3TRF9GBOwWeTs*`T+-n}kKbMu=?Jk6wnSm;@TG z%%v0-GIS(EBFWTZURBI%h?SZ?Fb2e`MN;=g(&1{hN@8!@ zI+89{D!M3+X=$GFAB!_5f=>0<`Tw6$QIMuQOpbiOZX}WjmU_@G z%813uNIhm}bLYaB@vw=7EqsI&mlK&6;Ca>)BNxI563g;1OD|jmA9wJ1+zaep417Kr z>!>OJ1?J~M^K+5;!L~cE$=d>3ZKG~ON(;#EU2OXIu_=^8${>CpE$x@`yFn?xcMs=dunZSvaO25*>N=MDGzw@16}b-8~F`#1(VjM6-$BgE?DiK-mXksbA#9qklP0fO*Tj9`AH-j62L5>+kZ$c^~@YJ>0Q2!TZ>rD5k;nadKj0 z2}`$vMK5!TS-KTz=~&B^tmP1=K(vbMUqhS>Up&59s@3F}I@44Z>&(h{SgA18@>PqY zGn-JrK(#r#umQf+jq<94JhuR?;VdO|V6Iwfzm~OM#~Po&8n35+1N|Fak#?KheQ-Ck z##=~_t)#~`KG{z9M7lfZ?xcGX-Cd;C$@K3=CxprIscySB&D&s4#}>F5cCS6tp5@K< zJ27`KBk>~0I}X-GsPkVo%3ys9`t#ARVtTVmMqy(_L#y4W*BECQTz&?tDP=sHXQ`-$&34dm9dzA(@uLzq*Gj5)PTI153T-CNlk z;NX_ON}fuLcxEwUg;}Qu7FRJ^=XLfJ#2D*JWLxdmd_m&{%l{J|<@jEn?-l44S-DKH zBwq@yXKY6uE@QM3M!-!c1zmPJVvNIw3d)RIPZ`fcJdz~uj%~X5pfh)&r;vg&VMFAtP8L89dc*8`$WWQjh;0@Dxv&W&$Nekt$=@ME^k~Gsze8Nt1n1Tu<@E z1kzNx)0q5px-+b(Hh4;fb-FpMcXx#i8Yahd?=O(#R2l*DRWI)3lPqS+w*?lfstcX) z1n@=VOiDLnB}1Z_3qVGz;XK|v2i?+wg@q!E1)EEE9xSv&O0i(`S>px1oD*G04qikK zUTlR;dSBRVKMZ-kWCl{1#u@n1CP{Fya(SM6nJ=8boZG$hamT|y-8&sAtsps8s48zC z(8_9gXQirQzN`GZ@$7gjY}2jwFunS91kUj8$FnuY7=hA7(pYQRSf9jdoU|4ZTT3=@ zuk+A5(%s}&;mKwThg`8Uxji~ZMIH876?WPmMI)lKIY#I$asgQ{ z_7qyy);1~M+e&$L39_r4A(sCn;5N3|cK>PkPo#eb8E_~4C;4B(^Iat6$t2}gKHp9E z6uNupp2`$XqyKced--G^{byKVm!1`l*5kr%y;GmL3niLKGMq&+yr*1}frw=I59E6` z^WD#U4_IN3?$rm^;muim^N>tj)bHariPAR@^UZVk<`Kv)M3oPhF;Ev#T(%6w{z>fm z0@Ip*jeEb21NRh}+BylOTWl-~i7-^xHn_*|Yr8UHYxgR+4qcZT5|7V)x&dP*pDz+K z!zWpM#Msk4SMTyX=VM&C+N*OJWV6Kg;DYb%Lg2usxF0xxFE8_M*$?bTK(>RaRG5X2 zc1Wo3%mWn#63a}bihzq@Ee&6XbW7=$(Ji+I8fMXNa);~cFbB5$cDlpWzX}th25xYb zoN!Z-B+d=4QljrFW+wA@G4u63<%;31?<@CzK*k>^=JP}I^CRU}hO*e>*5K#IxThhH zeYvvjK2z~D5}kuL`0ZaQNvapyL-{PJM4MZx5l0?Kvno<_2q|4n7iP0@i4-!mmWHq6 z1+#I!1y-IibzOKWUpfq^XG#sWX1Y8eD(SL_jaH^<0wT*@6D&hBS@6xtjN3f83_~p# zo)}ZgIE|0RcXjbFu>DyDwx4mGA^ysg?*6g&SOK2oWH5pYgSfONB+HV^k|osOG$=b7*Zz zeG%e9P8k!?7D5wHsGfy2FUg$NLS3w9NOA;e=8^lCJTo^Z+{2-WF^zP24x(R@$*Py* zWJI}cCCLz^$>fq`%mD|MjkZP=uG?YRh@@rncu^u;vU34yc=sfpMJn3`AkluV(4CyVPyO8c8x{K*9p}UmsGWTJ`T<$&& z_c;1jkik}xwWczcRm|~t_X~)#nzC>WvlGHJZXsQkS0roU|y56{2N=4?`biYuf zNj0(kHjNO%$IL^J626cyk2&Ze4ZK-8d%`?h2*5mS%VO0WC%JX5M2cC0AWgC)Y0FZs zZ{@#;Qf-s(P}K&rx?cj?F6B{&vJ@C6QYfs~6c_~k0>fNvwI1V5{zbgE^E7#{Q`P35 z7bG(?2WpM*WuTKF8K4J;S|k4o;I88YLao8TlSQrJK&=se6=Az6!KtDpK84f8kzl6R z5W0u5Zj>4oNZc|tUb%UZO{Y>e&4wLc5vZrJ6UQR@G(@y0qAhan=`u=T_lxOOMC4rQ z{d+0f=fi^U>v(IQXc5&yaPHR=L^+m!6Yva{3xU~hrs#lA@n?e8`4oQyd<{(^Jb$Ju zM~TmJZ-jd`{rf2x54gAD$u7DFT`{+J$j!q&O!pkRN9dkQ_dL4i(}f}eD;78}SfnL+&8W{|srV(@%R2eh$ zmZ;a+r1Tf53^L>JvxCRsx^x`=(!Y$u&nZ;Rl7lAf5;7EFnB9kOT>9q#R z=oB`hHrJUOT2_PmngOqest>Jvn%(VQ4v3|1`N2B3%t~&;o{5{)2k@=*kAa6P)Kohc zTnJyKGtw@z!b{*amr1Ef_K@&0z*{i2M}kxBa{mgzDm5L8w!*Er=tGDA)($1DPbwKD zh3_q-kDGd*nVT)9T+etqv=}_5yv6+J-z=Wo6cLuaZwJ_4f?> zH*DYwiswJ~^*69Z8Gpa2-{+9ZLj8cc70q{iI1i~jsOHOi(17J7(T9}Gz;HuPj;TI8 zDAk7tr0R=^sa2Uc;v=kAGWmIAKtxU<9yNQL^U8JItYu+CiBS7-)9Ict=t)ES&l{qD z!TdaBi14%_!ZTp~Mb1ao6ck^+1ezOAR#{EGtOYAo&}gGf=AY{QHl~+rtW?n zz<9nig~YR*aqtCTbW4q?@K7Wy%8BcF--jD|KY)vG`_TCy@FV@2+)H~89ucv4d}}htWPgeh*55OAjMgR#iO?D6_OL+E`rH1H=08*fy-EhEil7OiAmh zM1dIP5$ma;#G(--v7Sm~tg*s+Dr06C^GR4w#m%OOo28qhn~!?&q!7)4TNEvTTTFim z{iP9lx>ZKE+!KF=)qmzXlXW^WIs2n*9t+=klzdT5%v}O`>8ml?y$qf%QeoRliEe>ACi*novC$oH$I%@hiLPM+3pFu%2PsanG6hIMt_ADLC8+Zl|9(8z z>K9G92jFqkC8OMj;1L}H*x$GK-ViHWYU2w{mX7<)=vRwjtFiuji0-Q2HnDMP*iuI- zonPQ-?NCYNV5R#jfVxTnXW?>*_krqH2s*Ej{~bUB_=Wa6LlOTCY#ECD4*?q`MPper zIujYzMF-$Ev0TkorrK-NlSN=o3Pp=DMc5Li`9>DRKw}_A`oag`?8r>?4SrIzh7XsfHR7A5V}VPWmKQxr$zLs|KySCuI3 zoeVU>%GTLam0ho?-MP$asWWyz7z&R%)`S!!PEVG8)4n z(DiMWcSQ2@kHUp1L*M=bY*K$DR|IH(5uW9pV{ik0)7O_dmjKZ8ziQUrQGkeM4l~}% z@Q1jd%^4YH?JJZe*SAY8s+;0kltJBgDa;_-`~WS2xZX3M2L1BD&T4fl5_8FH=qg1n zTLVO}+@6q1Q6<2`=dTf;?wA6{7F(~(`&|gq`e0~=3P-Qb(@eEpBG1q0aAYuY^%>b< zf;@GO&MOmgFxtj~#FYzEJP3`XR-QVDsmLJ`%q4(PvNy5r_9BGt*xh>-`4_k_Y#Wf;rURNYXaRs&DfJ$Y`kh zKH?1B1FyPf1R1ksqZHs?myB7KFlOsO#%v?=#f(W{_QSFzhhcd^4{t>xE$(d4rCqp9 zT{nu{)`8qC_feOOV1O`!1;w4Yn@r9_cb9M3+R)Vr)DS=N3vPw zn+7%jBy#0)!2UYg#A*GI`7PZ*kJ=K~8J8BNrW%sAI)h`K~GrAPU0&0sm|eLIw4>mRW!>R6tSSNR`Ue z#RW&IQ?8B<4Wud+tVSre6KoV2pf2rng+Logd{{4WMRhNRO)#T$!zKW#*3^|MC(3t% z03F|z6)1zGgEHK1?ewLMp9%MStaUQhAxc$yaO(8`PkOcd}^4#}4T)*cjCgOd+JZc$@fP&DeC*b}Pf#AO&nz1);qFXM-ToXkmE zG_YHAD$^K5X=50~#*ibNjck%8zZ3puGS^W5AyBu@e++I5pS03#qdUz1Jf08tpMl%X zCmnP<>5k~%ttOh=|7En0MoVBDuFz)p6^7Zn^!9&XFxlhoL_avO3RQ^KOkZAo- zQe?*rsTe5Y>_(jGX#J4vE_iBq1>zp~YI%h~_B43wX| zR#XPko+RU!`TPhZrI?9MAeqWITJ6LILF`{8hxTDkixY@=i;?X;A@R;DT}Uv=PZ&FJHVoDymGcoKcT00 zvsWRkO^mg(SL_#VwXp*%x(3gAMBf-|cP|4zD3iYEmiq7pfJ5a1Zd1{93E)i#;U@rv zQVhE(0p^y3TLI7U#XaIj1=(;W{Cb$~dE~|O=^j=5xxoD3RGvuvJ#;TpR$;h(pwMn_ zDRkftjZSZ~ z3r9-7Wq0j~yK9jYdW<#j@=rbXCy;3;WkrzZuv63svwSMd#tjr^l2e$`ekcyD zn$gLixWEJ~a83m81eplJZf+6BnhDY+D1j-$CQ^jWP_3-W#6*PUdJt{WSrWAmW|`z3 zmdT3*eP^O8lfo%;1>7alOXhl0#|m78vrqD=J`H~sE?LyB*cv60)q6_T=^PWc#FDk3 zw#1T^3u8vI3h8>0vZc1hBCnSehRCCEoGA>Yc0f3a0Gx$COYM#YAWj=x(GpAFsiYxKb6)}{ewOo+@yZMrIBTpn&2d}npUqzIAc=#)7Z418vbgvSk zMy=KE*N70;%3c$?ZvfrLBkp5aEM7H!o_lNaC5p=%P0xd|1q-TQ(GDjQog+ESfXsh(m>vj_a$v-^GI_oS(8Peu3^(C3wmjZu#u{xC(^Uu0SJ&y%j% zcS84VgndCZB4+kyU=XJI6pw31WActL>BwkImW6m)EkQIXMEEY=dPe3*Ry5uXf109f zOLO#2Kax|N76s84S=nfUjzkg}L{Y?6GHW@}w9fG7c==1DQ#7rOMa9uhDco{U>6Zw6 z&hW>~PL|<$YQv+M^H~Oik}|}|l;PKS^#wIq3Ouuc>As+3LL#mI8-%^cbp>gCQvW5j zRHDu`6@S^N?on};_RCy6iRKSwXy{SN>6; zZzxLh*A>IwFkv{$L&eVeBgNkO zmU)QHDTN*=g39&MK2&Cxb02zsk=onr2s;$Jd#7^7yi7iyk{R|Si1ooveq zQ#oIXKYfhLaHo<_FJ^ZUwKDm1n5dP>r?|ix`IAp6nJ;Ar6~!|73^!UI0oTF6w6GhF zy32e52BzibD#LD98+N+}tDo1ZKO!-c<8^~`9G>QQ{lDaR!{8imG&$a6a=h7O^$GKH zi^&mdqcR`=BshB>uABLk`ZRKt=@@JDX_J036}!zOk1b>@^z9}k+_f!LxC5(xeIR77 zIfTk1iPP*Q_ELKph`QW94*#dyD^T`TAjxWbjec8+0m22jH=xCMn>l51Q6Z}KqpQsa2p)Ag7=Dyz#A$y|l z5=!sP5C7Br z{FnLpJ1Dfp-m3pwJpwxH^Q0D&vT{!QU>Rn{Ssc^mvng3Qr7ff3RQM`984ahI`mpBz z;G`#uGuobfDouy4k^%oT0b=l5WX}JXavrrQh%PWc7n+}oY>srtYzl&ljdBZSP^C{_ zYJM)m5$(%u8xPJP8E0ghVm7;+N5NAv#xN!06+Ezh14U z#tLb6I6A8Er-2NPWcqrwjp|xioZZd|^IyX?;VjNkvvFgXFn3Skkz*jP(4n)?x#>a;&6Jh!o<~nOY|sx)kar`s33d<%T1 zfB`HYdiiDn__KVYm%j^*vxgG~=;b9h1E98b?n6+I4ZZw5@SG|WKO0Lv4+4y_&&WpD ztzn%e0~ZgzK&=TvP%&UbFFy)d?3Gd0zDZ+kFb2Npvsai*k8to+e>jYv8r6S?^|~I0of%?nEqK zEgb0x0kFcG_oecoom4!svDPIO_t9(9if59F2mKW{RR~%5h=tEsxgxJGSL}V5FTv5H zfqQ##Z+!M!NN^4tDyuFs4V6n9DkpQmq@i*$%ES#N6wOPn>PACRf|Q~L2}Ma@qu;#P zG(^$B*9g+r3}QpPgA9rb@4+nzhJ!@dI9u(#sr6=A!&~ZJiLc$w=s}iRTA<3Cz{B^n zcQt~xFpuSaa4o=Ay4wQL6>Kl;#)R%fD_rAAX}?aAlTG$y73CG*E8)}B<*Us((+Zi zm}Y}3957@KK*9yZN$e7d?F#!>qc2+tCsfCrz(Ul>+&3u%_bZrg5OZ`b`MQowQBS65 zuw+@&#)kNpj}#6B?g)V9_XHd&_T<$2P>3r_wN?9a z(6Ceu8-b0j(3OdfM-BI}yA|p(QD>@lXzzuTWdYVl$Kr9@*+2o-M@1h~9$9>tsPBn1z7j&MNn0Mbx+A) zNYYa~(6`eJ2uub{wo|%sfTd1J)&L|M90ntg2;`LC$lzwUqk^Bo?IL52W@xvi&pC#7 zjmVprh`dQ@Pm+bEv?s}F+@vQ7XPk@lEjt!@21w5rKAW6~yz>T%yeUarpD&C)K;#`Y zvyE8h!5Wt&91A^z(#{scRP0@-DEE#kvdKk?d~!@#*#qm5$iE{aHspi=3(Z-UR@@nI`_)`=1KP=)_g~Xp9b|-sk;}R$6C6^Y)& zp3lsh11!=CXAVs=n=+6J1ST_2EYMCBjbv~~!6MU5V*#dHc64}Q!>c~X zviie1ZgKVC>=jFa7 z)gzgD93*-MOKytIOFfNlM0$&!!|AC+lr4q!2ae*79p9EMK4=x4UY_f6_adaJeiTo6A zioz)dQA^CwJI+O{W$q^sW#=Av)nys?rW9O0o}>Cs6603Q0hmBWb>~*Z8>K!O`JaNP zOPxVME$wkqAd`e$f%s3h?B3i=Z-ezXQkQA)3qz5MY_tkG@F|KjoVN8Ec;)4v18qYE zv{w5NH8SUfMGDJhsdMm)!Nd{MC9Yf|qj~Z)<1at}bnMmsBKcJ(m-eE z3!IzWF6B0-oReJxyq-2*3)fK1La1;jm?D+8R+(J{JEW|zfn%;WZxn8|+bw?*e7?+*)wUtNXs!bZQ>PXj zS;Lg?KTGW&?@dd9P3t(4m)QnzK)JIMV8c3&H#0@FjuXs9%{1Lis8(cyhk$y*HHYSusw`Fa0=m)5X1K)n?#N8s9E72I`9 z^aLilp6-S~4)$*h9y7#UmYJnc&@M7?UMHMRbq0BYF>*Od7}pE{W^DcttL+SY?p zq3|qbZR?qs)Yg$!Y~Y0a0@I2FX)7Ak--?xN#m1lyuQid_%_K8!VQ+Z^&sxd9tt7}X zLy*=%1ZizVtdt;ag9$Qh5JAL#Tt7kD|Al`$Qi706#m)dsg;bJD3HrHo1nY-$FT$k+ zY5ff9uir4%ZzSv28T>}e{89LHl$F5+@A&TO`V>$bdkJlIsILU9a`%~%|Es3T-KR*& z6CUUuDk$LrxlwHZ4~!X9<*|dRJnmnrJU*>5yX=I;lI3edoZq9E6PFA4dX{ovA``vH z&QT6b8k8t6Or#tjUx_IY%RUAP;s{?&rGozr!&@>ur@UpBL1W&Up7PdQM4=fgg+m-d zUxJjr2KCc-4C&hoDKL{#U>5Og(im4L&aFThhLNE2_DbeIGz6VDh==AiA}A$j-(Vh^ zKZu|U2GyN5r3T1AIY4SU6?tf}K=tEck`ZrTYg0zkX_1AGe>$EgfD;Edt z5If6QyXDv)u6wcn`Z%e}aDOC1R!CXZdl94I$`I~Z%`{*$bR#xHH(?`mvprMc{E8W2 z{<8(KWmNo+4$RG-k`bmhz_-$`ahz#8D)PTZ7mc&`2{n-2ke+C1LKP4*o(_#K1D_^$Nz6LRJ zCrcWC!m%5GymU5&(|QalEEDvq&sCnHtXhsGoaMPtHb#_W)d_i^Qv8*LVl$&WJO^$? zcoc4Bh;JsV3idPniMbM|ROL#YB9$w{o=$9O`Z!l1+l?#rkPnf5P3CgMHT$JDt6HX2 z!?ZAT{RzF1bC^~XL|SW4C(Lj^J77NG2k&Ezm@TF)`8@n7{-~g$#!gk^rg3{7p3Ooo ze6GoT&DuR(V$yZ#a=O!FW;=3j-uG4@Mu#KP$VWl2$FR{6Y0R6pRZqEd0Kg3CV=|7+ zI>mqk#<~XQ9QhpTo%hS6g+~NJfbw36KPzAk z6)_RzP0(d;$c@?l3JrLU>qyJCYRlrGcEdA-3n)TarKj3hO zAPF8wkc1>a2oQe>At6TcA}>Pxki*@bgTq}8cR3_TaCZ;x7UcU^&FCd2x>_#IFK$6`3yE7;3+^?rYMH$VcR0$pqtN|DC(Gf90HMrUIC>tY6S(?g z{~waj;2uO3ev2!qWnxcC(n)6J6T`%|s1Z0G zv*WT9VX6p|7UpP-kg7Zjr_#cx37BeJyRnr3W@M`l7)`PW69+gT)>ar>!HZxkcu{I@ zD@NzTi=$@Hit<$$1&k;HQT)|cE(HyNr9of4_UajoLO()^@B%sNe1Fte5OwIXps3G& zji{d^YG{>iXg}aC^<@TIQ?ZfOB)?SWr4fMraVRIK&Qae$F@PDV&ZW|p^*Lx{N~JAK zpwhNDskBGoB2?Ng6P;bR+jtr54!}MRj_eFn1r6s$Fs5@0TpP~Tv{mth+40F5cK&2v zlI?yDUX@U#V&yugjD2B)KrVF2g>EO$+dTViJQBO1 z;arL2<#n!sn@=uD6*|A>iL}-BMA@pzoIH;-t<5K>e~kkCvA$y4f@=zn=3IE);n?hF zrPpB5hidKJIwvdT^xKDv^!WQLZajo}(VMt}D;U^n=8H!N&OG5s^I()Zbj!P zc*6jmUy(Q2lVYBC<>3=>W$qky@$k3}KFf8lKMpsE3o~F6jhZxY2~p04yKxo?>HyPo(876WW&59>uq|}x7OPXgC2>zO>f|B z)(C!^!UR$i?thyRL2vU}=YLRCYT}*N(meTXRo&0k!F@S3l8Wa+1X593n0ZN347eX& z82Qsk@~7Tpj`ypTKf(m^r`}Tt!if%2E`}X5_n+ZSCFK7Sr6Nk z{qeAxe8kDHEhCSW5_qg;!H*?O;GTK>kCiv*v0m4VH;&?A8q34HIi1B1u^vX4z{9-B zZHE8t2@?>U&;Kx~froi3`0a@|o{U|fLTO2I(#-w95>FoC-i@+ZHr zm3+L3cQxZ*6;bqFlf=KbfD@l-C0>|7;@=A{dcp+67V#&(Xdv}Km}S% zjjN3h=q=#z%7@qDlA>^eS3P zxh|$6!;gXcnCAghag}VI0#_PTNjd?ljJoD@oCWcTbai*Am~cL!Q#g~)twvQh3eKmR z@mW)3byEkom7mvUX^oUy8n9L&&D|H$c75p_hWr29$tFO)l1A@qp}!IO+X!xo|Dze- zHNbB(lVz4wPhn8SP5<|Pd699r`YBaOrN~`>;F&|VsUop{^v9BhzO~%-CuuQ%mZtU> zaetN2-z4;Rx%VHMr^aKp?x5(6**lsN_M&X%Y)K2e=n zCns>RFELdq<-m ziJU1~*J_9_a;B0J?Tb7nC$jl2l2J|#Q(=+Zb183PTolfQbgU?3n;00G?;)Uq$tHc9 zgR;zqm>3y_w*Wg{raertMCW`Sy_N}9#!j>{c9O+Swm2w`^Jgs3w_W*qSq`yLEU3hGNe9 zh}c7SvQl?X-D%>dmo9hhZTyo)7(2s|CK*3zmHUw`yr!`o7h6 zcn6o1^9Ft{D<^YTI18^@S$NGVW7n;a8y0ud`nhHO+_rwu)iWN7IV>*Hx}eLU&bSJT1z>1cgCove?iv#xn+dup*Rx2K-@g69SEUC)bV zbM1@`;xQADRTOW9CkcCYy_7@EL2C=uod?nvI>ALS^= z*;rLhpj2pwT=Fu?2fvI8zL%lN+o*`Qfx&2yZBEBCEeYd5&v~RpSGTeEi$zwFH***k z>iE?N$FD}hDo66`+3yPzj>9~>SZSyus<4;W|Ek1rtoM{R>?XLX(H58Nz0REUK5>*7 zCJy*+)#Prcd=iBdiM$F#FjkvaVpW$|SlW>^3CFSIZZ+ar{wC#-n9oW~SiO>%!Uba1 zl$bMoF>6UosESKW;W(y9c}|{lvM)@yK-lwg9k$KLb;5C&bzyA@JnFkpI0=-L){%U< z11;8jc>KCj66%SI;~nrOUNm{TeSMSHyq2CnxEzO?jEN)NIpRp&qO4w7}&$#no&B2~m zf7YH)uWU~>UvsqP_WfBeX4$l#xOwM!Ru6%*^E|6p2PLN-h_Mv!LfqjOe#c^N&xXKX zO&45P4TSrE~8km$Re=U0;_d%7htWRNrf<$b8LDRlr&vR-b|ithcLF=KSmJY@Il0 zz1?_`S-;K&B{S#OxyYp=gD3MX4_5|#C1B<>n?j=@D4jSer^qWu1+EP3$`P;ZO)!6Cr_-F79|*79SquYWn(jg zL3^%PDHeOK?C?c7WTDt|bq}7{bHzW{bH%1Pu;=P)aM*L@^6KrBJ6vxbYDUT9;Rj?b z`moUuqNy^GP9Byk6AQ;Q0#q1hjG5Qz9mWi*RLzu?24Zb~FMq?>|1GM|!oa3x1@9Pt zkjX0zc2?MjHIR7wD?Cf<32%>!cJM8e;pnPfxiC(Ep(@5nBFLWzGmwY~zK=f zh)7P9KM}$N5@B(i2qc3Kri&6cT9OpWtMrxiWKYZ&2tJEYFB|mZg0HEjVfNbijSJ`3 z7Oz>$^|D06y)2i1txESwagDV!vyB8c^-;&W4Q?W@(5R0S!*+qOsZUbE_>t|-q$2Xz z4*z32{g3VPKejvY*h#^U%`1SxkIlxU1CK3C;IS>vdTdlt`*3B-mmK@E66PLexbl~; zhPUy{K}(im*&(obU*$mlYB(@o0(1VaCdeqBmlqQAOJ03m%6kkovNFprRN~VWL>s&ifhJ%1l?m z7E86l=(3;DA6Ax9Sd43BRZzL46bi?s(7cI=#broT2)2K(`#CwvO0%PDjuS1ogp?Gw zl$4Otc66<^yyBLzZvkD_z60)4_C0XR30+>wNd@`5i^`XXWtDcS4^j~o@jV{31njmr z<)bR8JUGsaKdScDyoB$oYA<6(`K;_e;)---{{=tTb1$4Q$wy%A-u6^04f~$__56Jci3HWs4|R9@t9BmB&b{^2o4ijZxOmXshzbv?>pL z%+YiZC3i9XaF&+_A<5=b*d^L_Rfl_aAfl|+!=*5cKZt0x z(E1oEry?^0A48Qu6XN8LAtvx4ROQkg_z+~}*wQYN51~4q8>eqsA3_b*^H7UjQox^W z%FHsWfY%Hu;I*V7S#A|@VWfa-VTT|sR$aAnF-~P$xft)y#RPvYCi-(R$)AhKfn0o! zZ{^R$=kO@Ob%(Ve>Tuf1Gq9XY_&J*~CYfJ66B`T5|u)Jz(gqMjr z;(IWWj)AG$g&+Kg;XMJSo_u-T;=QN9@HuNRR93@!5&6_n>qWen3h%&+s2}cZHq79rLSGhNQ9~roS{btg`ei8vV%32&-?==qoUR zzD1){=KOt&R|Dx0wY$cRIMCm>kW--og+q>k3Q|35%Fhe2#kH`rAPcC`cKJREssYxU zE#!Z*h5c`~i2uzN^}pF-fj8TXAKCwAo3ZSm@8flO1->^cOyJF0od3!uE;n2o!<%<7Ex300c z-`UGR>i2TzAH@Ap?*F>TdrMk`)^h*1Mb=Lu>s|5xS#wv^UvNFGhbiM1p3JEyT?aX@ zPTfhg{~MBjmA2_O`&XcUw_inu{vndntlGJa(xReV&*&J`RdQF>o4KnPE!|a(YVKhO?g+9Di!?_i)N?`y}(fb#u z?sAb?3NujL?d*Y$=Ce9xPGo}9{X8@qYwPE+szfy*#nl5o z;nKgEs65^Lj1>AX)jY!d2s!2wHxhgl5N)!mMvOdHthjOF#*3SPIfnTVljz|@CdocD zOd)tT;2NF#1fKPvWY?i|2|Sy?rf}AAk3h04kNFq;^2%+j98Wd3!zaJF8*Ty38HvGz zXvI5Y^>>|d`gvPCiXFO5*313A1Rj!K?x#;dRqGnR+@`2r2HnFwL|YXG`5?$e1#wrE zpPi=+voI%t>n#17B+V=e0ycOh$kqxuhAzXhM_7VJT0a@q&nW9>v?U!gnZ}r) zw2!r2$eq5lHSph zKFbm`+xp3}e&$#|b1msOG6Uz+kn$Ip1(x)#mh^?8(id6MyIRs0TY{EYKTEBjW!4V{ zVO(ZbK>A9NZk3r;mh@hh^wpu#vn}bpEa_`3L2Ip_b=J>%>*r5PIyc7epgeL{=#am) zoPgGH@ITyIeu_SchFuQ2rOyjXU%J)1vMIZ+qj#m*9m=H}OgUXDOsDFg+#VHSm}ek5 zOvP&4b;J%&Egc~-kjJ`xtpCWTn=}Ky0XhLg0G{lE(z3P%I1StZY{&$jG)n=B0#yK3 z%ljAbIlwBH?SbAvCNKk70_*^e0vCaQ0A5|;iL?ShS%9|})dyY&J_No7_=xQ;z;J-q z50(NOfy2NB;2(fbGY$umfucZFpaJj>@EPzUz(+X`05X8tz!G2suopND+yD$rV@Cq{ z0N#yL1>p07n*r|wp8?+kZGfJ@P+&a3c4U_U8-Tq4PlsOvxF;P06abzASb6+KfcGu( zf(LJ3?gI1&CIE|o^}uf6B*2^V9|CT4!T6lLX8_)@%^MxR0(f<&BhVKZ2}}VN0BeDr zz;WOT@DK<`-!mUj9^e(WhQQkZTa^6)Xa|e}W&uhg_X~kuY&lUFZ2d}nov(phf-YMdSFqa&69rTy$x{E{FJNr9nuyeMPt~zma6nqvt z*E(sgYiVv~t#gNy_B*dSsf#PaMN3`Th=T>Es5McvA!CIk66jpQWeJ&(I5m-WB<@Y* zXtR>$CDGxe6G_ZWP1%$}ds9xPFq4*dYF=89H#;vg-E){_ioRZ`(*R?CwiT$hvSuD4@=gSOzCk+1`f zEl%3ux{acQn8IBZZi-HozoWfIAC!O4htyp~>x{if6S#D}i%vcCzmSquIz8Qx-uO9~ z%GN2qF!j?j5dH`~RLxM-!9ZJ~P)(GmfSM)?v34U93esWJB)RB<<*1$B-HH>SPw12{ zKNV|ApLW0%XLV}&E#{QaY%<**n$##?x2%v*?v~oD(?9wGgLWG?kO)Md%Vlh-zQMY9 zp}th74fJV##41OC?bKL$L?85=Ln(P3EO!a zb+@B+;jkcXm_daU7??2@znPt4nWxfCgp|(IlElw2n3r?W3bs=qKOOY+6cmoG!ux3Mzxc zC?jl27|jT~6G>@NJEQ1G)R{QC9CszJk>D|sW-6n?b(itlKX#*wf-%Kqs=wXXh1M7r z9@i-245~Ndn!4{W9-y`{()FrF2Ysbs^wsBhj4AqTGFIqWWUSO18msilx{6c8Sn{iMXeT&46~FQFx3pFsH(%kvQ&W zh{NmP=y$;EG3&U>k<%LMXGK5|+pMbs)B~CT&4Dk0pMj15njviz0OKOsB48b`6X3;A ze@{IDJ?+9kC7=%QI`ARz9gq$T0MPtu^MUoiKHwa{no)edLtcP;=e2-0fzN=Sfv&(X zU@E}9@`C`1r}hwtKyNxfz}vCdwhS+Bv;=+tIs$`$dB8e=d*HmX@DN}%q*R~+P!D(u z_ypiycRQdrFcO#wECSX8yMf>yH`}P=9{1CLzqkDvXv~poJ%QoCWMBcX7T5_K2iU?A z_qN$oUNXR^o>c)C|vH|f%p!}>NF*ocNUTGohGHCofiA7SKc z95%Gkq((Hg(TqmS^l!YZF&%5%?KSGzfdZ+GY;$7WJD9nY%;D1Gt-+a zZ-S7a5|Z8IILF;3_d5MSMhlwVVoeK<*6pKpAJL(Yu71SK;*W2COg%ms{RuOtKOO!V z&G>B9XA-Sdb}QQ7>QXCarhIwwOZw-_p8sZM?N|4|qF!HTe$CAJZ^nL0i@yEqTe;@D z?C)s*cbC3n=C2>Rx2938$NWqOer@v`t^4hie6gp0y!0d8`tk0M9OvGTojA_X*3A6f z`bKNI)B1jEn)TDspV;R{>pQLKe(Pz%T>h!oBciVVl=d^V`?&+U85$k?<-o7h=C}U8 z(Uspi|8DtQ{DrRna_bjn*7j1@d!bv|TaD?BYF?u~eN=j1jA{C+MbO@W$MJruSARvb z`>Qqm#iQ#$wFiBMivv|UbY~D_#bDI~jq;cwY7vZnz~jMCwR4!F3&T`LeBkg{Jwgo~ z2^k~RCfFZ_N7qs6Bt}L5j8grfIs=cjnd*8b(lJJj!G{lzJ!4fmPA;7=PAwWI9{-G2 z>n12VG(p{f_6%Yyouv9tRy1|8S_|bbcpROo4oy?kak?5ZLp%md^Lvb%4pGz9r0L>6 zafa%RK5NfeY7Dn+8eQwCx9y};hfew?4DjL6zlS~)oy$o*^w~YdZ3Llg8jv2F*TYY{B_a@Hq0fG2yI1^UoS*&x*$cRH)|+YIok~aNeLL=Z*FFIeXsd zcR_+To-=j?f?2>hE4~$xJvyB?z!2h(w=T)NYD~R~{>D{fCyXZ`#__Ah1$bP!YV^M* z9+}sSsn-zWnz7UJIDXBz0FNuzjQ-ap#+g59@dl#F8)-Y{3m{|D21?sVG+-n9vB#;c z^w%~ts@rJvcJa8}*1Ul(%dNKN!?x&;;gW0<|FkvR#6O+PHt|n?XPfxv;9J?|U3jFg zF*~ghk1fk=JD1yN-+uey{b)AsM$kb8)5W*deT9y3Ut}=64(O+KN;5hdbkN|(eZ1Ft znr71Oh>S>y(AL)3Mmuc#ZPdq(`irl5jMpUlbUSUc@8&T^Tl9$#>xAQygN8UUvd_)! zay@X<#V}av<*+gC>2BKRK8j&>E^dcszlS=6^}#?HVL{x$NXp_N)(P~EG?k0nhM@sH zG&&%65f;j=HaD1b*}R2}Ls$?uH8NMO!Rygnd%TzA+V+TH$TdWGG>abZ^=PgkR#2|x z;(~GwVL`c;iwnvpgavVbhU8ixS% z?nW$%GzLaqv>D?fPwB?g$a#^*pOLV2*&2;AcRaYeRS2DoGC*bEdEgbG8SozPDex`u z2f!NuFxQN2QyLDk(6r^iCV)4Y9|u@z{RZ#=u%qL_9hXFaZ334BDgreC?!+_#-UM0# ztOkd}CSgn&@_}&xdimON0OKx=b>fZ!tO0fncmO!jDTxO71gT;`1)v7-0?-(E2WSbf z`9R)Q+YJ~7Oa-`uvkKS*>;{ejY+&LxfRQE^7kIgtt${%eCM$<;p6x>4!E#wKJ9&bjrZ=Ct~(|BGWrKWRNk)py9@Fxh8~} zXkg5G3jJA@Hqn^P%oQ-@2x9U&F6nQ;-)y zE>Q&IKA-*P5h_mu`cHc+xRQ@tt0?MuYWgY0eTrfNRB|$8vnJK4WBCTC`b`kdX+hjzx_Jxw{A*gQuzbKlpqC1b zDyM^kdO5OPw8hoeOBvp+d5m%+l<~q~WwaOyyA`UvGS(vX##VJjVb7#h_we42cY#lU zZ-Mr}0AKD4&7 z7U`kCc}A{16qINPhK-u?K(!uIGrf#~23;`jD7bj*I*PTd`S&UF#I1bzZ?8x*)pa@U_s0B0t-Uhw_eg{y4@&*SnU4}A)1;9d%wh_Qv&XY=2YcP?i(3#Fs zSOEmn+dhRSvL@jrXf(y1Wv5Aw=?+@xSRy`0ohO|1(Am~SLtMjU+I*UOj+-{Sce?4m zyS=O$E$&~ zyAMyCm`KwTrzFviq|=ytZTTL^$&n#S8Ys6;vX1>2T9b5MUCp5Xc6$0Q-Sc0BZ@| z1NeK42Z{paff~Sz09TmYx%)T3-z3|t;|dc)RrxOG;CDH|vrueG=oG;F1{Kob0pbDf z)R8Na67tAnSFWK^5BFdBhCDPi&zY;YHA`j)ike|907+8SL7PwPDqeuD75>rdkfs!X7Hi+C&;&2J- zQnEjMgUAl0x|gB>rG}Q$Xcwl4=v1k5AZL`$EJt(7EiI?fmeRXR(~;6AOKY^b%-%9| zsm$Fn)Vu7kvbp(_Wdm0QkvpC`{1l~?>k3&xJS%hR;-L#tqOyH#*lQL#DTZ)JOf z9sx`SSP_s5&~D%az=h`l;KsEHKw*FjPfdVF23&By22kE*G%y*z^8pn92MSHejdv7Egau-_s(Iacs7SofA1i4cf#jM9Y4jh#zSdg?JRG;NxQHGVYDO+ z5y$xA> z3-|>14rl{(2L=OUftdg+daMU_0*8UW0ao@QPVx;8=NZ8ct}Ed98SWhso25ig6ED<8`&GLB>Q z${0&DiRqu5mHhxJh=(@;Co zM$^v>VrHv^^fiN+K^$$DMEiq^@ml#@l!y?rLVcPm<>U`){uzCPS?%32^KbWkj z+gQZ_^VXNNKI*JRe~FRiMSno+4nfRi=Wq-@qOz=bm!0V@n&#etK?)>HbI)?q0{3Ez z8{`_{qCxJl7B|9m1>5}G!52@+?KL7|Oaz^cxP^Kb^6o{XMN-GeF6c0W+Zeeel6FV# z12-ZtL=U0-Lv(ohhv=9)k84VAW7Z1xyQoA(95{6sFBtSNUY|ds2e|3XU9iF?;Vjr3 zZaT-jDlz9MDw(Eeq1geS0GR3lW+?{O6`{0&Zj^e2uJ{pk#=NNbSF8_u1n58-aOq2Bv6CpTk=Al|f@VQy zV;sKYn>n*9ApNr|Abn31LiuM`;OU!P0Tq~CVcMEg384ZrD)0@agJ)L|@zdOQC&XtWtmKm}%LkjD2dk zGES(Gx^YpBGmM)&`@)Oq+;`w1M=DSns0zFQGzC5Y{tf&DpfbH|yKSRxcC<=h(j1)} zG|VweYU2ziR4Eoaw@J-9$GyN!%iXK+#iKv%8R(&To`o`S?vt=Pf%+$o!w~gls{RIT zPzQJoe0-wuG6?99NNV_QC(wi8p3DQlKYz>%-z9)b0IQ`o2Hpc&0j+^_pbvm)9Bn25 zB}GwDwfa3t~kwf1WhIpVd3lsqSJ(k*_xyQmPs$T-X0G)wh0QXXs09ybIcD3`s zUBH2!MKq8PU^~&Qn)(vJJ(O0!Pe4b2RZ}y8<-lg(0B{{(!$%PS?+#+s)LKA8;C-MK z@FUO}=nqTWEG~=! znN1*(uVB{FpG4beuP~SBE>QbKJ+Yx1$#|#l9B0U(S7Uq4+$8d5apiMoTss zISKL`AF0uH*AW+8;{nhh_c}KX4*N@Hul|4Do80H!)YUW8Lmj;nWOm-?)Atb{EBGr) z#C)THk%Ib)-fKl&6)&`ZMPL7ie&I&+E9%dSiGLaRnuI}f>t|N$TUbvtORinb+Qb%v z33Pi2I;$`@s2-t9H=wTQYKeb<)@VTZuy7h5J~f;=L`_DY2Jbq2RwP}EeIWB}LnAUH z=u8C8^F~N!#Ks7^9MLI~x<&SrNxaATV2fesSaD zSm+admWqh=k(VNAPE`A7j(d`s&B4r8AJa8@cQmbxyDB1kL=R+UpD+WX=fQxfFC>IH z5j`n}+Qn{*rSWkqCGLsn%hB-3mPbErF}-4`ug~Xl^spGp4!+=WbgxHz2F6c}r>XJt zMQwO@{GoX2lQ2Y-gF~33@h9SGctQq3pv*u|(H6`aSai`G`fc>ChREzKB7F>IqA&{- zJ^M1XN<$$5`Eo-_Q|&DJ)h6`LXX4vSLE&$NxeQ-9;`BoICWqUCR*~)_f`kp#M?jSp znrx5ZGWD4{&8HIYRI8&}X{=qyPLA-J!9H>Kj^Q%=Om!#qfpa{1=M9yoQUiUWr%}^jXls zMM^bwR8ip%F=gEa*E7u~kbLxL*2TPl!3-3M(P95fKaZD)IA_!aMb}kNa97abYlWHw z8Zx&P)|JJhP!9b@D;kDCjUV+xto*G|n-ylSmLn$ne3e7@XSL+n3Ok7w@)F?&MC5N$ zTSyo??{m@-=PEC4@s3HL83~&cjHqcYocpLZx_zq|T@J`^t0Pf!K9oj5Ujk!;nNVEYB z3;TyK=k3?*)Y~ysnvoEuSJd<hUzY3m|ol%slRAs=*5gt`Z07|K}^-_ z8`EIzpr21L_ML=GuD=AHZ`vGa1^fVTR?u|TkKN6UKD0Bp+Gz&VlWDv&8|Q4Ebae8Q zr(7M~Gz06)m~O?&F%9?5@?yd*d{sCd48Iz#QOC$WQ8X)Ra}=g#Epm;J$0O-Jc+q^| z2lnEgm4WAhCIIiiXSGd!H9R5D3u3$&b{Pmq54H@zi(g*=6oGN!GPMP-r%Fv~)C^S{ zw8Jvh1bA%Fw`0{h2Z{56#YVE)T8FyDODO7VcJ*mKWWEtN%;zvf`IZ(1cR`OVf$c zXG>GtGJlq#sbwElpmmi_<5ZIW5`4eJgid}*;w)8lk{rY60g|r5>kp@NmAZ7>jm?@38i=t5obIiP8Qb$`K8_l#WmTxk* z&vk76z`mR_^7+jUTNp;i!%j=j>z}ZWUh3-IDcz{9-acMhoECkj9JAk9WMF3BW zy@RkC0Ps35FAX3A2J;YlDeO=(vs3SH&^%)Ys%;J1DT0`@MjBCD?49C}E#B#ov@mjy zr20tYfG8RnbvTC3#B{J0I#VvD(3rd%Wv@`Td~5Sjo74xXl%0QQ0lvCTYI-VlPVJt` z%uo(F;tRP`Afq7tT`;o{hukf)wJ7Z^daEcir%RwXcP!bbBuy*1tR(v^FMqT=oh^T( zJTseem@SwaX@bpbJMlnAK)*p7)j_TX@02%q9{`5dLzQua{V*RdIU0@3c?Ea__z3t0 z_#Nm933#W;L74Y;DQWZspCqcD!>&764dy6`U|&bavxNMXgi&doSVQJ zfR~_5z#;o>>2~_Zj!_nvbbAjw_45taXV{O~>7k>S3=9@J_d98}YqhL+WU%tgO3!u~ zp{Mh@+}xP8vL3xQZhahejUO0KSK_-SP)5RR>B8Mi%uJ%iNq=Ek89nFZ5y_OD{8w@= zW?!C>d1-OpP4dn{m_vC^=ArX>uH>PcIdO(5s@ojFIr$Ehc`vjg<;Ix>4j z0b>{@wIz$V(6SATia)@8ax4<7jH-MYQalmZAUue&RMw-vlN*kTG^|sRp`)!%b z7-d3d^S;?nzR*nDR1R)yr+L2M&6sqi!>$XGUsXI@bg=VCzPN5yX zopHDFp*U~Pe>uN~y?Hr%3NGh2Uc*-#jpmj!AQ=zy1<)Gcay1;t0#*Q<0bT(<1Ka>~ zlp$`2Qh?&XGXR$>*2dsQ=_i1-F);n7WdYdEEZP_+@e3_ZnTA2e!8Sx>G8L%Enb_b% z`ubONsZnpv#f~KQk{?6S`fo<^`H5!gdNm{ONM9&X9MJV0CDiOT<(kX0bF|q6d zW;6!#V0PqY{$YLBzp=c>(f*-yX0S~2C^ODB!$#+9SZF~=2s6N*VW;(WX!9c^gz4gf zm90UZv0T^o_R>)A0;yI9c_w;js%NHr3#&NzL2&R<_aG0g3EL}`bK8hM5tI=zHG-B$ zY?YB=+lXH5GnsuhiqG+gYY~(d*)@_fA}2)B^2kk6!DU2^i^7ci9;v-EqB3)sV^P}`XB(^;usI`jV5WnAO*aZi#sm)-Z>)W*}(L*2th zAp7u0c#ij>)q9a^(h*@s1=OKcz*I(Y4YAz7wDISOW0uv)D@B!t;w>^3kqu~f}N8S|96Det^D) zwbtI!#k`@e5Op`F!KCq6eT)Gz(}WRV>$--B%lZEt@DjkvAuWL)f%ZUe0IL?7f1N)Y z^frKJYgsA8K-ClhJZYx(YlNXGswx$07odNViv?;(mfY+P1 z0*8RVf$IRz*t(EUu|QFPw=wa0^Gm?%0LmH8fqYxGu;eB#;>ZHbJoV9OiN0E@#@s%8 z_2CBge!{#OWQ8!Ijd<7>vc`Bw)KxZ4Y81lo9&|d4x|IclFg>v2lP23|*>QTzIjIeE z`=s%{&e^U3^1VKp$#N`qPzSda?Qh2z7Y%TamTN+o9o)sZ=WHjnNC?xz)89jrJ+nM? z&~rj6+T1>OJWIl$OmR~*B0`uMVe`W1v@c{&SdVa;6|q!4rVwT|YusJ*-jFXVgqapO zCz3WqZo^i9sLoOW=k^&9bvTN)$IOww=aZRjQG26kT&xwXQ}pT>niIQ1t_fkTNBsL>iYkNt6!fbMS*4VRebB(<@8S zHwdOGRHZ30f!B~+b*Uw`CaQ+?%~kaF^wBy^(U&=@qSh%IgH>8)#;{!qKeJA?(P0@D3p?sdDo~F4%G&FfRP|dLEZez3)&@BhmwrjL zu@BnkTl*!DyzTr;nx~d4G*_5c+8`z?ex~N+d47>;Ms#R+s9T*5q1m*&( zfPDaO?{qj&rvn9mvOsm9KJX6kCGa!Q8<+rKk@t`FY3qPF zlU>fk(usO9bKP;zLBpM6C0Yox)V0P%2V5syl4N`8O3GOLwO7onDe!Zuo^y2?7 zK4UynJoFb7x;f<`Ob_n>FHP}gd1<%zu(UO~eGY~9kDx^{2V-bp>~v`z4uu~Jr&Hl) zrFAV zGtg0*lP5D{lBOon=A_-yLWMBBl1C-e-Q6b zqfsdn|1ZpgJRS4W@Vw*l(zd+&Wtt+l&z-#8^3lNj)AO_3?0lW_)8F}T{1+xGbxA56 zNX zh&mPRDa^D&%L>t;qBDd!T4+)cno)G|e_?hMK2VtM7fvt2Hw|H?6`Na(HWu4ajM9pC zD$c>VeMT3bU7Svqx?hTeFBRWXf-+0bEltZyZ!b;9N_Qz^d1w7UyfaFSFG1Hz-!09T zjxRB-1l=v&M&t}CxvLZnDV-@HPiESe>Q;)TmYO3;3SowqSyzVkl=)kv4KFjQ3{5FB zU6?~@VpHVJ<#@_j&47dr)oKTl;8jmXg~iz0QC-G@#zJ zdK|pIPRDw5rr!1c!erH5T$c{kJz1Bp31Ke2aPtM~_F}&mY1xZwUz9uM@;UV4#QL=I z<-xCT@ROOL^)sOZ_=**6UH!e#&U|I7TywMjjF;*BE4Tj(bK<4*FHz5z2fobr31Kca zxY>ZZHSE`rmNi`4P*R!8=V-&L4XJbEQH?oxU8B3N()ibAzxKZ~S+6dBl@7jo@>Px& z#2i$CW+#*kYIvqKncIXmHrdgH9yaONlm+DWS=e-OQySImYBQSn`pVa7#G7m0vw?4fG?Rh3YKc}!?2g}=I**e!sF%b1+?@^eQp#wNdOT)T!&S(a7 z&~OG^(0B%1(0~RzECM4M(19Th)BZ6H=)jOxsC)qLeIdc4K|N> z+_f_2i9tT@iJ$Bp^u(A^z7|VqakF2e<*)61)2MM+!EO+oYf2f{fg8Y0;1+NjxC7i} zLq+L2ro6f$Z;ini))|v@45#M7JRIy3;WhBuZ16l-BKSf8Ug3=J<>7Rh&%;#D24>j| z?P`qiEGE)jSkM`hK1srP`_B7ee|nbVUW_j=sB%7*_o;4p9n zFz}vuo;VLs0;mqu2VMt01%3uP0{wtdz*=Aja1_8)H71%d_5<<(rGP2`@3Ude)PDhZ z(iH}*g55P(&ybmH1G^DnYyiwdV=>W5vO+F8t~sckvy;615N3xv-9tlTN65%wk*9MQ zt&BZv4I{8hO!vb&cxj|h)9z3%pQ{n=qv+4*(=u$xid+#%$0E>A@ zab2a?-#)r~G|h-!AQy%(vfFNP{OWi*8Gk{hq;mVTPgs&j+fxrnQM{gTFM)<9j+Hs0 z5N3Yz+GM(rd=IlvDU(uYeafB`x|Px)4~@-}m4~+HIVQdN4*7@VrzHh;7T{d_r@+Gk z)VJU$={)ywero<2$Vw%)bG-^> ztTg>E(@-xN_LmJ$iuM~Ka6GPxKLWT)XbtdkZ3mziFa#J2%m5YvoJ+hqeiS$l_?LEA z=O6(n2$TVM^FM#KO#xnk{~VwQ>=hX(OCC=Ls0IzFSu-qMT0d_?H`I#pMSxnZGEACf zVpABV1IOw#k>>qI@w{*ZVCD#||cw23v#)iB10frgn5lZ-aZaRrloY-4S(6v`3$*hc!vo_3tcI?uic zD!}&Rb`8r=Sxy@2n&=84*Ew&)*sW_Je2)dldw4ti0-i@kI*>upKq^ogU^Rmm0M5V< zfPVwO0G$A+<7!iY1;82rTg|j1zy;tgU`Kh60rCT7fNH>tKr`S2fHm_m3-s9a9w_Q* z4$My`0Y!mlftP_dfRBOJKnDPdi`r;l2Cx+1^&MEE5~I=g@XK?X1puDgWP{PX#`7ld z5x@qce+4=L{ebbnY+xyX!kqd%Wn<3>lbzVGMlUrrm>DR{Ew*|*!NBbi?ID|Aonc^i z0GJ-Uq_fCaCd_2EYJAOr?HGhC;EC+p#sgv2@Q%4OSGuf*gfQcvJWf|#?c^K0VS8wU z6~cQkXY3s0q}A@jK5uM^yK28-XSrj;W`x1KV<+hugfN{VGb3qQ)G`UV9CbH}I!E`B z`a6Va6Wb}4=EW|PHY9}U6x%hHmc_1=kS8;;3HxH~4T%=Sd`Ok>9SwkuQcZnGZPfr) z+lA)kL*5F^ik(5;<5j??j}h;w+Q)m_v9sVNYIshO*-VM_*+4huHGJ$D|oBc}oNB^XzojKFdM7U9(ZseMlLu z-7cE#+3cZf-X0=$nrj8L;C;+?&mIq5_KXgrd1099>w%d&zG_9-!!VjCO#7TxLG=Wk zv#hN$6g6+7U=k7ra|eb^M>^rNlhuw7tK-NdaJ(9bWA={X>A>|jhr=eSeGpzWxCORu zcA9HnYH>&1)7akDKugM5cXto;7lOUod&CPnmaSNOu^lUh5->XkYP`x~Gs5?ASK**Z zJqvNuVF^9;fDOEz4uL)g+y+dfI||4LlmghW z{;R-SKnsA^fOswZ51qEiaHmkAr2fh3*ef%vG?|($!|IEY;Rjn?X}TS+fj)d!kn_*w9#%~ zv{SZgIZRJjD4=<4%>IBFP}b;-W2uui!#oX(Jma|P0Ap?T83QX>GzIG$9J1WK(M{bw zL!>kJx4VspGJHx+qKM;XR(V#0(W5X@ ziKc;3hM^6LGMZvMo!56EK6_k_x(Og7jOsS_{|xrU;Luc{G*A_246v5e*T8Q;CjhtW z1~m~nZoerdFoe0syYSN-ha7arG2TftotEa-JvNNd!HyFsaCN$4y@UEWgM#PT*V{3f z0XsNYVagc{t#BJPQ1`LA5kEI-K)f9cuVIQ$83wemhJxjgn($?7G$x{WfmEP8P!o6s zcnkO#_z#c<^Z~{IGXdxkXd8e%z)65BS|@TU4k!pb1=Iju0^SAw4g3PYsHuiJOB)SL z2UY;vfg`{L09(H`J6=@`Pz0z9)CFDx-UqmX{R!ZXXkQ=$moG0;d2r8R)`u z#skFxUS#KOq0N8~0Pct)Q_)$iE>p(=qK9pOji%VLY_!RSm2Jey?K9dwmizq24xU^nHiH6Lsw#M z%i`xhF_YqGaa^{{KZGzRV$a7?_qhHN62feb-xW{i%1@HpXIAQ*R63V>QG(O+ zFa96i&I3TIB5T`?j9CN|iWmkl07^!{5pWV=l!1{MR77lgx_f3?db*qLo?*s-sB2cl zoU^XG<~8TVHM<53tFEpt=B$|4{J(FV_jcdz0eAOf`Tf4$ha0MH)u~gbPMxZIXNQuH zclde--dk|+b-$ncm0YsJHSS8m!H50t+Og!}9lza?X+G?KWB%ZaU2Ymsa^j%V5&sC3 ztQhd^fRdXB-r<(}o*i-G$da>1zCW_$`;niH;^k*YyfC8V}0PyEEsEzi@oXCF36-U-H`cFWjfu9^dPyy-NO8 zvAlu_&M&{ZyyS-Ro6AdXt$ePMXC5nmzP#kK@{1};F0Z)S-Sb_=6_q8=Ri0bT%P&=) zQC0F_6%Q~$gFUiPaS{#7nU5RmqN=6~@btB@I#hT^HCW z{`&@g3E?ID4Um#E5u(tkKk;u3U?$I9+n%_(2cGWTea_n558keOaJ%kvPh6{eYrK16 z&+f@--Bb15=lANKKCpYHd-rU&?rq(>x9{G4LHF(*@Vk%chS%E8q~Co$L&0=ODZ*h8 z$5Q;B#_v`9-p7ybb~lKa7N1*k8DifZze@b3<985##UY0)aPB7j?!)g1{9eNE1N^YB zDzFwTc2E4a!EYdbqw%Z4uMR(kT@S%;0e-*3?{xey^9)>v-(C1Ug5UG_{S!ZiAHK(L z9h7}D{ODp2!4Iv4KX|}53ID!U*8{wyi8nA1e&B8XgS)yvkiSxJa5UcndPJW`C^%T^ zzfy4U)Y>oQ(-a(hwZYFDm0Yvw2b-1r*yjXit-jjewhdQ)aKeVZD+LELsG%PuN$A%v*gO2H@VU1$xU1_&UT~5NB)CnH-CHcl20~&(TxQ6^!~Va$*;Xn zcL%@r{%XsTe=tn!?ht~%Yi++P!=SwbM`;Tjve7g1$#0me57acHLcziwgmE*N> z<6C$~WD@@IA>73=aCNuELd_(wvKjlZOX1G!if?qq;co4MKbnSjmaf#S0^Xjpi=0r{VU`^$0vMEqtn z<5d(q?-=C&E4T|{b6!_l7j0$<=ElwlmwFUI{Bmm+S9dRz_SFN6ioL;l;Ktgj=@ zWTgKZ&pLe+j5NwLCJO^S3+;{TA0LFhop}B%oL_-^55xI=asT1?;R_&vi;<=p&o~js zA>{uP@?Nw#bPebGLwE>6%_#^*6<(EWFR!nh)WhUS z*;1w-BF!a8Lp|{N(Ltya|GORM*_ZtNmhH}d%eMFW?XyVBy5O<7GChQ}EYsJ>e=f@1 zoW(v>9;2P`Dw*$*kxX%9`09m)4aBj=} zpg+iKS@-<&Zbdoyyuac;_CeO0{fvEyWxiwno?%N*M_CdNaXv+)mtou}6_T`6={#iWpJ^Zf0 zy+4EdB3(1|;vVEb7wP8Xcog#Ji9D}H8TNy&9D{Qg;J4{qtno*nw_qIScSn7HI0)hcW z>vx0an67Qq&&G(4k^hDR%>L1qvEO?3jD43jn)aFf`g7zn3iUk__0FRFJEA_Pg5RUu z$4!ME!fuU4{^#KOa^&+}58MkGT!nk?!EY#@xjCNoWI23u+&_b5VSD4>Q&ILwsKdLE z;r*!Z(q6EixbGX}GX?3c!}Fg*9cqbXqurlGULBD6R>7CJg>uj@8G`UkoI0&ABr-rRRMi#L)#$TeyB&U zGTe)2--WWa;`kLj_j{x{7kTf~g8qW~cCUw%j{A{C&W6P36mA2Rg&B}RdtzH6u=acD*xh6B(3EBph zg<}wHMseA_GSZe%9_+)EE&Z64^<$1rtRH1W*;0P=u{>RY*m7-m4rIc%Tjo3WIO@!E zx1injglz9a`S#it_7?SLUX%sfl4IO1XzOzHAJ&6*?GVU@C zl!=YYowopadcnk zHe@mfzu8DT635%(n%B4X!ZH2ZuOa`>k(M@_J|TSqzy5O#>P6ITYv}e6q~B;5`XXdf zRfdT~4fL-9X(69Gp;MC}>zWWI<9OawU)?@Jome-oPEX-ke!qkLb&J8M=T0_fpsw>j z&j(?h#^QKw{MesgK>e8Ct_WE(Pk(vef{f`SP+sh_b5Tx+FPC;-l#yw#kD3oK9cAZ# zo(_2P08a-NBE6^I`{8-?J(dhW*-&@-i1a1rLXVoz9#A7@hT)jL=3?Bt7&4*Fpx)6RxV0QUB4q8!qx=3Qk3CVI_aLh=C=>1fWj>juSYOC& zF3MVLci6X>KW!6b!MUX;Gs>eTW^+!?!6*mLL#OWx->V14M&!r-%C8u5?!+%6dbXGL z+w*}bXU;)5_weR=w5|R*2yHO!u{RIlJZ2R1V>$Bv1oiUlGv_ndkrNnE0VM8DQfom!8&^EW#)|WwF+#`7tvLyTQ6rr&y1l zP=^kbh4tp#`Fi9(r4zn9>iS1Kv(LI1n{a#}^mhXK#4R}2AI~d;PGwQ2kI_b3p^UA_ zmt|hw5AB5eFG5+b33cr!8&5)iNBWX!=(FHkxOXD*{RHW}{u0^N{FX4v!2hP;S$E>O zFVBZOkZv2~u{P?&rLSJB8-8D-92?9=AA%hA#r>R5e}=rNGZ&)o4?vnulyPgMzX|0! z8fEXc1L_7@o{00qaQy@1-xGNRnlSf5olb^ae!`(A&kG@Qum5w-OFPsb`JG>8bH>g? zq1PLm9N6aLkth3kv8`B&y7>Jm&OO)1GY6s`jr*E@P$y_NsGDr7-brgW+Ft4)?E>f4 zUd+PNcj`9vm3GN4lQr=#9CaeUavdRQXY2gj+CSf(fwuSN zg`d?x!0|Kqbc$L4eILAfhX?p@GU8I*-_7|NUSq%K~Cx_V>Jk+|L$ zZA%$Ywv-{|%Q{oWj0Loz%p;K3Rig|2&o59Xmf<$kf%DnhkZ*l=dj{v_Z0p&m2g}Aj z#qyOww`l{OfIe_+*aB_f#UnYN?}PSco3K5oH}B%UTGWMh<$ko!SE%d3IDQ56^zVJh zk>$7$GVMNm)nj(9A0dBltnu`N`r^f8`bUr-^n~#_>J?*iv}yCu?`ZdF)4xJlzeU;3 zgPdqL7_;;A>KC+c8}6mNDMQNKZ_6kTzb~*0cO%;GC#3ruu8&5# zb$i0b;rgF({zBwG2(mx027*nSzAT!CazdVKjlnVUcpdq#wIS>a%E`V*9l0Db-wF5p z9&J^FemxKE|0C`zmg}A<3*$%5#_wb@V{DPK#&-0a@6>;wjVFvjvM(}jOTA$K`wB&) zPW=tf@?=fhkYSr*PG zSROZ?;`}i+kOj)s%a_knxXygInCI!^3wSQi`Sp1T(o>%)N9q{IV{hy~0r}GRVmndB z9H;rr$#@pmAgB}6v3CwNd%)Nrb%pX~J5%^?T`L_2c9tq{~iGOQwE-lXk(s*PWs0!me<)$l#l&{_QSaS+CuE^|}+!V!h}aJc&N)%QFg`~+me@_Nsvy=Q)&JgFPU zp^TI8EY``B7yCHl`LspUPx`|2xjBYX&si@1N4rgbz-x0fVBEpDpMYKmeD-OBy)9pF{e^X)>{d084~>Dm z4nzI%oI@bbcuC&h9*2Iq1oi1ep3hIh7=&y10BB$wY%XQTwx>Q6>+b^OQ;D>%q!Hh0 zM4j-XyxvA09Gf0Rdg?Fh=jp>Gcn<5f2=!SX_a2Gz9FJcTx;YN^Wf+8a8rr-K_B)9( z^E)5Qkk4^Arfe_r$zUMrL;dk$FF~|JaCY85^7QIhl#@EXD%r5jJ)fyJ%Eb8}=Yp)S zr#nrkr_)WyfjZ^MlX9dy{q3-R73LXzQPv$z-k(4|E0gi5kT?5MGs@i$X>NpUIEF1o zeq~7i0rZzX*~?UA;`vmXb+cVYieCrvl$e^JKNRhHu`$o|Kg zzDHTkISg^my-Xgxd}aRzZO*b+`}B1%WXE$H9}a^}JP*g0?Tj@7w6i=80!RR~Z+53FV-FL;uI?(`<_yP>*9#r+twZ{UiFAlpDuC zuOCq-sjr+1y^VZ!!!sU6o+H6<270xZwJq24xpq>8x>C=+M!kFaAEIo%JfFGTRWb?F@}ES@o4+8czOF`^=K28mooD7jr9tnKJ>F#U&`tN zq@y25-*26fh_fLrWx_hM4G%_{|B566!gs7utX+aME9e<+u0QAW-^hohZ1|D;~B4bDK_{I=DL zSAT%|c(SB@odmgWc@W+`UKewLu+0hR*Lgn9XDF*T-z&vEeR0pPIM)f8ZaB73uZKfE zmp~SrTW}qM^9s%>XiL2IXWyU>vmax-ao`cuhyP7N`ROB$?T>ax*)N8iFW(z^;XK>w zO|;(wI8WV!naJ6rv1k{{*xO@p@Z|g+gSR0U#v~X=@W(g2{RF??()*FN5uLwoxQ@}sYH9%S+k{yhtA z`vqkD63X@oetSc{|3E*X3=hR~%TaD5b3V}xlH&_#kDD^k9bB(KeXs3_8}MHT@~TGL zYyr722HF=g*a_{w4A;Co?!mp3+fn66i)%Nb?Qh)MY$jvUEdQyyi;OWI^TnnRo z^o1zDTzng4xC{AkKIzRNIe)w#=jdb1MwvK1Q`U@Y?}>bVLEW2?7r!sYxf%N@WlmWY z&)wKgo}WQm!`S39UmyM+ddE6-Lt6T~<;b5pz%tx}x;?Qyu0v*gK7B-Qd|-XKX1oIO zz8LLV{9J!q{tbCB#*=}}#-q$!L*1>H=@5O=L!ndtIVR=Cx#8$hRwqw}5AF;(Azcgd zScEie2m0jvkFuZ)PQiJ=TVc%vanUc8`7#+w?)mswZsgH{hnopMDViHP2W3{4n!(>9^9~^?X;)S2_l= z8B~g281`;5_Vi`)?aVei9?x0?9i?ucin@M@a{K+DDZV!4-!x^LvW{MRv8^aC>Mh%r zdi(Emwzy6GZA9OPI&mNR`7o55`u*Yo*guAU4}=^jlgZF+Plvmq6Z!SGSmz%_KGgHZ z{-)cPq5S;sd7O7`i@JC^&NhDxvf!A#1ae`U4#qRNMn-*S`z^!ury;|kkSF^o``$Td zH?E0qgZ4SMukHVP3-{pjRe=vt_KQbZ|A2^ceg5RVOqT2utS|NYWt3?=@}-_WhP3Q| z4f~+In$XvLY3JjaYzI#tSLP$GQ-?mcj@e*O|L7~SFY|fSRmLyg$1|7VS?r@M3;*L- zLw}#P+27wh+v)Y2pHR1rAb+maPDc6PM7e?}FLf!4d_4Qci8{+$-0qCK|Dmd|k8<3h z+-Z*)I~ZRIokD)J?SDibEIWMx+U>m|s}%CC?u9rZ+GZ}w@VM`}Q^whI*}fcO7z^G5 z?a4h2C!n0HFXPs)qD{9(dX7n43-tS={<)Cnd-(khmW^fd+TyHvHdZkX?Zvh!Lyp^? z{QP6j-Up%2t(*+6LcWwC-+A@qx_sxA%hlhXST@#^F?!bT zE9f0#vy{hI$e(id{2kBNp-sIGdOUel-llnN{!2q%j+EVrP-?d0ez>=fPY#p|*A|B2 z+-$0QL(o5#p)CeMFW46Tb=g-p zGP$>)&MhcAWq4G%jTv|18I0vnNBJLpZu;8vbvfUozsoimf_g4MTH3VInI=QZvSb$Q zNL@arvjAmd{V6BPls!4xS7s59% zy-;F)75yywfb_K*QAW?lqK)RGX58A9ObYKZOr*9W27@suPtX`9DvMw zKn8Ifvww1)LiutpJNIO8>|pyI1X*z$;94o$&znn9&KwumKlpu^&GGvH&!W!Ywp?HB zxs}O@drO(Vu7{P0&-7$RAD3-Od$kzt&i3)-MtQC5nJnvkSpKpp-SjZ`Ju64_|FBApI( zjtd4eA>1E3G@33etBtlM)163~ilz&%)Z>P#cqo&}m1*=S+|e3sO{J4fWn~q0?%0>t zR1`g45s9RunM{2u6pro}i!PW@>i!RlH>tLGC>x!Wjz(QM0;q5_oj?sT*);w?P#a5( zYfEHe&53BF6c;B1YAl<;lu#lPkD}VBDAI(ZDM*OP!mX*|vkStRj;3^LFdi7%mPrq# zIO9W`lgZ|IbZASmH99mKJtWzZ$Rra(JGWO%G0IhsI-#Lsd_Y z{wHTB1e63#mWhQkL&B+4zA~`KHQrW}qB#mS?xpoaXVS|LV}6V>Xn| z2D2?`NMy~)JC)4DI;@KM-W#cpHnZsilcU+HWF$HxlxU7ZB_=_s15=?jlhVo7s!+Hk z8i-{k&Tk9FA&79=smrvs)`nzjV&EX}6|TdbegrITTp5LuE*=};?51zP~tCL?XAPj$2v)r&{#7sQ&f6^TgI z0S5${lF8K+d2yG|b!8`OTGQ%uD3%D!Oik>OdMXObW+$dWKy{(!XaKenT{uufgB*(EpLH;jP04g?P2SSc;QQ?lk21;dh>yd#;@wQA$L$Vr*=`Zc-^}=F-Wu1aJ zY0U^Na5aEF(BcQGlBv#yq#6F&P=`~*2u&mO!n+ZgWP{)Iw(Rt#TBqBzT$LEy&_^{` z#F{bwgj++Id4Wo{gDG7me~0UQP%CPFLsHr7pN?gt0XyJm7fUox4Rt2lpr&o96h|zY z$=VR?aiF?kYJCurvf@Mo4e1bvnY_(GnPBTe*_I&f2M1fKZ#>o-SZGQQRY1Ww1ad^4 z+!jhl0{g_`an#W3EB^e#$@XA2jk|-XaIl$o+lcO*f+?taPBK-YL?W5>c?PT3fcYwQ zF{2q)A%gbFy7qKMK`{dL81~R(vn^1!rdS6YjC2NLTWe#|X>S$UE(NK=e`&w#DKY@8DC^K(En!ff;aRBGt(SiH0Q105m<>8K_NWvQ&duQ!E@x zWb2*E)FdL&jsUG5%HA4Ece0PvMzf(vC>x?>E9z9p3jbexwzbW5b~JOBD=?qQbQiCr z5pLE0+7QKKi8eF%MP-JnJ0+!GhgWVgZRsdIf|Zz)iJ^47vp|VyaKmQ5f|g4*8VNRb zW?^hy;|Cj)S-2^IDm*0{otkWhaHYN!r8O=!A>Z}jd?muE&isl0c!-_AEGyE@8Cdg) ziS~RBag-$+sDOV8Gv5+Q>z{Q=XAv;6HP@o^hMJ?(6P^$4?ww?sX=4{g4;p|_BgPJz z-aus(Ha5C9G6^)NlM5iQOw<{oqz!%JFu-N9g-$`~&OWyQPcPkh!fFmUztR1yKL+W{ zpcA?-2mzV#sHqM#K=Y?3;+-=St*}ki(J)5*`l%BuhmROL&K#Jc3WOoL(m@lTBwcUK zxjx=aQx9EW_vQWZm1fG5rGs`W_1UtrvihmfMij#`NUuQ=Wjv#k4myvhKv**q3u1{# z5HhQ&3&0@24M`-Y+ia?;B^sWGak|Q>{q*)|x+xAFutTVVjWnJmhnL^%>rhqB`=uY7 zVZPQP)8LvE+lO{ImvEAz9dLZRjNtP#FpR-2)&>JnDAHb>Q&X7k156pl(K-y{l^k$< zG7q+4ly%c4I0R7Zkt1w6X>$#XMcIrEy!mNF3bo1xL*Z~+tDBWYkdoeub!z9c)wi`e zs|n>osWKQ!v!Qq`)vXyl6`e624Z*vF(VU)$2I$;4SJPiE*F`zQYmK&I%*=Oc7}^A; zx#*U4VRKjLx<(`UJDOa#pVS6Va{my<{~Am~W9?D(ej58|1SNxEj|ZY@6t7Dc=G?1L zerYRZGJm#Y4jMJY%pE<|>%CR-a9#8d{?JC({^=s*>?!+D*GS zxMkL$31|t$P|F~m)*MT~;qb=dndrlqgrlJ7r~x?HY0fvt+t56zE#X{0r;>$(v-6}i zKG$SsAbG4cmYvuUjz%Mp{ZzE_s)kXGE%aytXiP=`f(3(5(ZFOL>K=OptrK*)nsEx) z6b!delM9?%XjdvqU$1MIg4$%0X!4HYG@A6YeNE{#2Pqm1tCHyJ7%DJVfRk=dZKB&{ zrX&-9(R5Q6C?BS?X!Sr-8mHD^8#`;_%zJfpz-G*o&t zjkQMGLvfA+Naec^4-L1Zy--KA1EUM3q^pc{q_Sy-Iy`TKVXMHt@P*;fp&KB46qufX z(VmA6*j^WgHwdfH63Vm$Tu7uLITINaSjF7<0IwQ`4enstrjMA1hB!NdE3jbAHEr(k zY4jAD3fIaO%HlLE-f1%zIzF5VQdc!C@}2R-=M4KQWjB_RgP{-4!(0Vj6!Aii78Wfu zCGpRxVB6S(DMIShjBMNnj9l0Dh7&hHLUzXeqUlD29^JY)hDgVXc}FiCim2ujOS9+ zF9@Z^O>9T)-Nd9Jo+%ni>O&ZW+szHk&7LqDLBKN+p}&Cci2p%P#*{soh=twY8<^OU zg(0erM%q#h$$8O)94@~-a53XGCgL{pi`|T}z+py7yAGW=>C6(B0XVoiFS;Q)aoY6S ziM4_1w$>DMogNd~DG(09J)m_c2ySQE8dqtyWbzkIMqn%*3!BG{o*tnb18s>^5^=6X zFxwh%Jvj)4#BxWx4Ffi3xvh*b1XBz^1QRYs315kwC2b)bCXHuYDumemCL^0h0~B}a zoY`qIgAka68ANp_R31fZ2}P0%U}4kQoRMszKVFZ5ySSZMUrZmHn$VZ&XtJl;G=wt? z7bU{!smTRXqwUdng`!J{Qp2-tPPo`@J`89NgrUajjK;)TxP#`dqU7k{&IyZg7(_GK zh5Bf`37>%orCgTLh+6}~uu0#*IrVvi(3Xiuqp1Kwum~DdSsY+uDv9uyc^s|@^5yZu zlK$yW&MahN@U_t>&OS4)?8WLlqm#|haB%*ayVi=wKxtUmX-Pzqqv`gjEm^tn8)xY* zyysfMYA2_zvU7T-CgPW=;y`d1pdsltMgs!1E`ReTAdOHakcy=c!G+E;pcjtTg)J^; zQ)twyt4B?j8e?iTJ#TZz&}wMgP&(WK%dKFv3oSMz>vPl5fF~?3OdV(p9g0bMV=R;? z4aeaHmBJ5Yz_E1rs3F6KjAjE`gS&;gz(S4>h=96acLuRE#Y-6qv;f%56ndO9Np2cu ziF29><$z8$CjuJTiuz5_n3dD}cImw{e(lLvq;$Zbfdkw>1`VWtA@95E%*w5-xVzy& zBW_}Wgxy5*43R_8gXI(~U}!ejnrUu9A1%5CI!^NvY;WasqwCZdD#z8nKwDc3Q`o}L zmwFid;R*~Z=q63*&s1ya5&fEo1&I|TUt?CciQxh_#N_lp6KiflL^xA0EmnWQO+N42 zMOpYi69Uu=>kAp`3}&YhObMEj>4{j_88;vvYHf^!N<-PwHCB_X3-Jd->1H%Y7RH#h zDjPetZ1@;7Fs;$V))dD8nK!K@t)drKvpLXhVQFJnp0IVZY`QZD>&7+H;xHTpvqqzC zU5CY#SK%zCC(hW08TXi;rfGz!#GDJ!8MtKVW1I=GS)ARfiFBajrr;7J=e4Cw46qr6 zV<8qyL$P>SnSUiLAG7GH23ShBm0dW090}oRV`Q^ndnH5VHIW8y2&#tBuSql|13AOQ zGez-wo>D}>3$mLnq4VlGP-V24bz+1BYxqum8VbiemIj&fs@L*&cKQQ3?y#1 zrpy`ahM4PkXlZ9#*o-tSg~lRhXee46R5AYHUN~r2XRqO?VoU#B!f7dIL)x)9wa9*$ zZR1Lr3&s^)U4?g_j)uF&;5gXBu5va#1L4**oIDH-2paj__Ix0YlhWa%%@2V*I8%i< z-84Qx?;Q)ET%SNFhi(D{L*vBD=nL_vX82Cpil1?-L+;o&b@vUahy*1le8kUV2wUWD zZ$?NZ1zo{4x59angyNJT z*M%_hM(5?L29d*_&v3!EifQ#V6Jan?(RysHi6-EIFicW!;SKJkz{J!xBCMX;{Kiz6 zjt^Yp4CZLw+TR*`7D8;zCSV!_+8<{R;&Isbj7Dg09L#t#3=9+4bCh%45tTCsx;h%i zDC4&l5c&U~RUrRj87A6r^0W2mAz0YShNzvXmRQ&s2j{VCRarfwvU4)TfVB`fqn&QK zhgQw?7xcHH?Re_@ zl)sd#TQ0$6~R$H(drx&bv8# zgsT+G1ah;-Og!0)twpG5McmDwvj_=uKb3Pdur}qGTdiDJ-v6!&U^_qM9)$ z+iySo0-V4W^CG8MkDC|ZTpatW;FF^pLc5@ph|%N<#4(_tP*tvTD`vy(W1b6;&aI@C zqFU&l&>OCM*18xBBlMYua$anWfMI-F{=yqcr?%NTXeCpiu$QDu$-0Qu8b-yTQf9+W!R=^QBL{24LcYF{gxRG>-s?@ zw*h4Wk$5}SS*R4;BxdVpwzmTVQmWIfnEBN$XFgCidNRl>E#Ec3Wp!)@LB5isH|8Pt z5T}2M6rLAEumbC?h&wV|;SJMnHi)tzP#o}*xim-%jYKrlxqWHN8CwZ+nsOlSOqCJYU%JyvPFJULny_Y&KO(RDhRfx+!^ukFq#@`=MPI$U(prZ2fMHXp^s zj1x0T?#F=U;{ob9u$R8`LFcF8YQE8-UBQ-UC}p89>Yodt$Y+CLuw&O+Fd)>bNb6h?ol=tu{*a{ga*@10MJN2$4HGD#ePh0d4qfB;2q_fWEED= z=_BIvVeA@(QgBq6gxs7jhnbvt4cfZymY6L3WOL&wZOu^*ytoS!Ij;6_vk;QQF@keX z9s1v!Px>kOc&)#;@k)g22mkwr+Uq#pK1{4)rSn?`Y+0aJI$scHGW+M&pp}0(&h0=EKD9qJ~(u)FWvfV~KVwR|e{- zq&a_#qeRi>KNlEGN2kJ!72cPNT+%e;7AjKka4>lYIDbG^OSaD}m(3P@(}r4G_p)h& ze*rKaO*CUec55<%UBGTN9fd}B*ULgVzsK8iU~3)T>LLB8B4h2Xh7_OnRtF(-?nsyl zyHs?`%6Bz*>56WKg4r4d-dIvUU?-_``*5Ad=l(g4FDsRmNjPO?Q_$j3?5;!~@_Q2! zj|orVSDJjZZJ=Hgk9rI$^qtg~YF!g>6twLkn3!P}RqFHknO%v8M1XaTlSFt}NVP2kXf2EA(AxGh#=2kbgu-M~-@gXS$q zn1@i4$o}Sj-w8RNF#pm-n%flJfXZg7$zWZOyXbI|&Ixv^_=6(3Q+UZBn$8DHynZ_^ zX&VkTO|VTYtDe~D?E>}7LQ{Z##W%_rcZTtZhft1HPEG8kvivqei`ihq0>|FmGG;>( zUJ@EU6n6%JGkLQsnA<4`rn!=6?z&27&6QYm#_fgvn!6oD zsE5W57fziK|3 z4AbKg&r-mHHEYW1S9WTJ15$wvK(LN2hu0<8a|$L{g?0-o;6*3!a%<5m2X0l}*Gy$) z|CiOlHRhUNnr*N6m}5@^FaqHwFV^m@4c26^p(Pq9+|mOhGEd#J%WOc#48EL;IG}&R zr)lz{irCA6=zvXc+|prz7~$9_(Y>Gq7>Ln?&W+%5I5s1?Z9m+HV2O)*PHtV$a|deZ zD`0BjiWA_rAu}9axOPxrp{Bm8Ct=kuw^EeOx)pRNk7qzC-8*QUBwI7YG(kk1IJjVoKc0f6 zOKj!G3ZRBg>##Jz=09Sy4Z3-K9k&iM*r?axGcF^Bop9bZhR=ZMUA+d*PtGj>=X+1u zSzH&Och%Pth3y0M^xi@r*xH6gLFZ*y^u;ZFK@ZTvm?|V073O9a)i9ZGD-o60QLK0N zbMJpUucYfXFnfa#CfS=+cmpo|hks)=**O>o!Na|{V+=dZY7!WEp|M3h4F)n4cRM4B zF62D&mG2Fvn)4MAJY{;?wsGZl^>LwxF%nyco zfAB@o%JJZDMJPOFnceRsS4X}fF{Bd{4>#0o0~|lrKqqu&g3(93RC_2sE#Y2w^G~-^crgR-l(=_n zbJ~X2a3HZjEQy6m#kSzW@zpF%#J!i3v<=AJ7eR2S%ho_|Q+}~EfqM)qSY%1uTc@yO-l2@VHAEM<%QeQTr_sJ1j=Oyw zya`?~jjqopZ$cl)aHw%@2n&x_39&7L&5zscwcMF9E;_VnAUzlS%lBQ0h}973yf7kO zQ}KeW=vUmsXu@Jm(dDbL?&6&aFy3fa(YM?TE5;pop+=}GLb{rxAjbsXLD9PvItz`q z&yUpmcJ2F{j#0hhH}DbdGoQeXKr<7V(@N4A)YCB6lw1QW zRXP^ILK{~Ftv}~B*mKb@*roq)qQ_Ad${G!|7WV|4)pvG^x`=0H8AXDM}>Zmwf0K_Js?JaH6`tP2Y72 zTR9SD53NBuD%7fE6VZ=*B%n%E_j--3ZI5+TX zKZ=#j&LZT5@f+W@uxX3wj&EPE-t)sNf1LW;w&5y%1;CB*-f%`UUf5^Mg)?s(!CLq= z1@{)nY(fB|IHpcb$i-(RQPcd<0{7XRoKI0W>gnx#tP3sFxNQrptEZ%S@phR&(@Uj%8r)qz5V(S;0}*kYn?nbl`}^11+P!1uW^tVbaE zc)Y30T)(-MJ-@3(;I@fuvbb-%Z(zW$^yV?@yw1_|_W5Jx!u4K`M&A3qo9x2Smk{Rg`XX=-kIwJo*!ZFEBFQ-n_J_) z2?0!luo~&!Hsh{?{JUIGL^v;6L`tUcZ@pjEW%m&lU=eKIueyG^)s!mcy|dPcR~+4k zkY?gFQjNx(Punasw~+>aWI%?DFN;8hikrX}26gX8*u*H!UC$Yqwpa(^j8X`<*qPw; zu4p94<+1WO4s0a8RW{Gfqr0T^J3mk?x8KfY8i;V2JMGzYwzg9ZT(LlqH~T?9;JlKi z)ZVvtn+qUl=Lx${Be}1Ip(O4Nm?o@A1YLO=V3J`{@C`bC_Nz++7g)3UO98ZB<3kLBYPa-ZHH5%J(bwPrBBz< z)T}WhjSj1A$lf+lXQBPY%B>~n8>TMkV{41F(r~$Rn@BZ4VuwHyqaQ-1E=r$Yq~yMH zj0NmHUd&TV3C{1M)|j8gPOndD_=^Ii@OE8bAXq7_VWCDz%|u12rA(dgjl*N>RYZS7oT+gT-WzEa2kt?UBW6b9rm(^ zXG3(NIL?L{o%`}6XAcZ~>4oUpno5RPbF(}1#@*ZP=yv{(NT3b4fWLYg>3T1R4K)8y z|AFrf3hIDuLwC*qT!HEs_cQSo5BDAvod+%{L77|@ur&-KVc5f~0fEfCSSq-n1&jT* z;EB%#;LAK~a4%i{-H&`}{)2$-e`t7+=c?|+C0QaE*Sc_hs?xVI%T-vsr@(HGNzoL1 zWDhsZsIP}Jdq`V`!y7-!i0DOaTN?jAg1hZUk8-akVthh>buU`mZJaw=#1i9Jjn?3N zz1z7!*4yay=SH?tt&`9^aEQh#AnxK=ffupeyZZe6t_4wCm}M2g6QpP9k&g3*s+Kmq zdZ2F|xpjDV#`kSW_8^XdFwpo$BjOIfF{Eld%rzauluV16Cf6siZq7^i@&~u1c6s4& z<M!P+Yr5WrZA3_+v8Ixo`z%!5~{RMEW%ll4BS*Bzo*Fio+u>irWkby2P24S z+XV|&F5R}kKy*Ip7LS{}-3nK(Gua17rcT|jHg^i^=56@uZ!C*>69R>tPVVPki!obW zkF9}$;DKW>zF=)DdWiEwvF4AhGI_5Z%NxF!hJSwI`W><@-jOpM-!f0xCNysHFVfQ7 zvS7RShJrbzHW}oU58C14_c4f}I5mwm0v15=6`PwBpGDQ$n-JhCKJWF_d_C!BBood= zd6Sc(l6#vOvE7czR&zXSHItN!)nsT^ohRj2#6ssoSMuKtmtBKCqqEjyxy`)|i9U!| zj~kNtg#>$L!i{_G4m*~+s8{*TIYf>!iW1MLVs zJzIBi!;NcWjQsGDZPIPxbL#9iWHMuT-K*!X6t^$rNbA~j->9-) z5qIAkP!~aPmy1Q{p{`PjK2*63c+w@`}K4lg*rb=CE>I!-$1 z*SUY>K8clIruU+k{J1xX=Lhf6eE(Dja zB%+VlTE^Xl{Bj@P{+nhSgK9Duwqp3gJtmKMhn=DQ3iuYV1Ge$WH{f~u-|@xxipm;I zEA!X6=QV=>5f{MG?eSMMux`e$!#creAoP^Ut=6y70^9q4>gI45Or4~M7|r0;_+Iq? z&E1NY72a$Q;d@OHdZC2p)NH0tXSD66f%+8|0lqsu2e+E=yY?{f(B zrM_ar#pTPc?`q{Oj`!W7;&gEN`A!5sE|fD``M#)_jq^ByF$9W&*t~|_XRAX2K*?E9?c! zmvC(|sje?U<@<;2mR*&G!HWOZrz9-;%c0G=ea>@5cY=H8Ui5b(V-x?d#58ZYmYoaV zf3vT^D>9Em>NPY9gFLtIxy5)cB3QS~ZM4I%Ta$1z4SpyGk3|@c-vV}SF60LfC2jpV z*Eu7uLps?qvX?s#;U$wJa&r;xM2AMQD>#j0{??&o$`v0%SFemUd;O$Khx|9HeAvCYPRU*zAlx2>f4_cx)b_~n5B_PXdVj$qRig-?aX-jObYYLTZ~bcoyiu5Q6q zElYOE(WSBV1K;>u5GBSfzDuyw{H}v6HOxQ$vWh8?dj(maKZVLSbmAj-P=ACi-Kc=I zqjqd$a1&|IDj_*nPvGb9er(%fcLi}|`W_+d$HXV4PzS8S)3x>9;_(er|F`Y)w7$-{$8w-N2^w(j>r1aW zFE5kZThFR;`NB1~^Jfbu>>5<~9Z^n(+&7Q)Z?}(_>p^&%$GwpRt(H}G)6xAyX}2}1 z=mUgf@d3hNr2`PhEXS{{)((=<4#)FHjRUAQKu5o^`% zlz(-y;BTB^;AkF!V4T~G;YJ1yV7cg+B3=u<5_n6xuA6&43B8eVjqv*>Xme-4IRlFB zxi<(9s)HF1CcErf@cAab^)vToG2X=Dhu=61QB{lVV;T-Zm#I@Ox0Sy@7!2wa!j*kB zeP#P1-3G1&Fx?m#R|sk&TVfkE>tV$w|=>%%_)M$ z;bd9?2JU;PjB;GWUN9|bPjsImrN!b{?{+;X7FMvII7TbR2~Zne!$a&g_uVS0yaLzG zw&RU+?wx~KL>Fp;URoR!f4TqxExeml-|4=;lFN_A&)zW3nZ7`U+iO_gWrFfQHbaWM zFZA9gve1Zo4}jVY=}A%H2>5~|fvIqz^6@GQw=hJ=HFW(wC~F@Vh~X8l3&0s?L?zws zc|*TBO*Yr(ztI>=;pTU_y$mo6)A5aa*o0hnMQktDk>A(vGv>BK&f+mzrVBQO7zy;+ zAH{)OFoN432;g+h{UH=L0ip9&K?S*hS+=k{gH=u^R&>c)A1|lm;(YITI z;YM%!l?%H2U**M21M~m3Mht%Lt$FQl%D*Lm*^R~EO(75?!JaRy48W0eYlq=r8=Uzt zzi9;1lnFH<2*+@O>&3JnUAB9)+K$A83t>1zV(%{{pdF0bAWT?{9dz9EO~am+So41xbULKZXMG`wlLyg9$bM;vqJV!OU{3RJGaU`0L+;KoI`n-ewwv*kNF0xAe zyf=2?*`N2!slV69Y%$99tDLv%>tQxS=UD5s+cz{!1la#?O!L zD-EILhM{)vTN7RvAM8S~|3UY&_2uI64IlVK=o5d8LOc2k7A9Kbbh(TCTXz!Os`$JO z(?vI9Co%Hitd&BpPDV3`>uUEkM4XL<;iJt9;oG>=6BbdxSNZCztDU{opA~j5(_qqy z_RLLq-6-qKldr}3v1R*nwRpV*U90$$%-G%SzR! zV(&mi5B>`lA^d;n&+}4Xsfw&XJ{IE38w^A#@IBWDCS`I*XDH-eh( z%PqLD($6?*!5>d=#JnT_A|&YPnHNs5I1zGI z`oAlS3l-469qPQ!ChTs>46U0yz0T%YLu(QUjc2@d>>*+7m^pAHe46&a;rM&;YX{Z~ zEZcS`{84rMO?UCE?djvBT{l&TBNn)pXiMYuSyMK|=pC?`_Zn=|PpCLxXhr|!W2Xz1A z;tS#@;ugB^pW-)SubnOb9mQGVS>jb*>Qyehb&hIH6EuJr4EFLk&?t4zpZ&$wOh@Xq!ikm3^ z8RGupJn?k#74cK?OYsgp_i6EY@t@)dm1l1;DxN7`C=Qd{CWuM#EU{TUQCuRPFWw?9 z6`v8`6n_?X9ANSpF3uJY6HgZ}Q@Q$%wfjrOr*ytV$3w-*;&Ed6Kufoe7!;2X&lE2c zuM-~;|11UuSw35e+lae~dy3^^op_+wA)YN>CH_%dDn28wwW~dETXCp3N-P&^#l_-@ z;!WZM;#=Yhag*IF|2lDwIA6R>yh^-PTqb@YmdXw;k-P(YSiT#Jn~HxFZx@$}^<{Q{ zR6InyR(x6PyQkeZMjR zHs12tN*pcLi*v=_i`R*titA0V^gD~g#0D`XUMt4Li^K=S&&2ijvi!CXw-!sqIx#J_ ziKEIb-9F;6m3Dlxj$5nj_;=!o;$`CF;wR$xYP-KuTrA#P#1%STccR@tMZ8P=LhL@t zu5T{ZiSu;*4e@B5zd^i3Tq^!F+49+Bit$qMW^uzBJ6|q_#Dw^?_@+31Z@Yh|eT);t zx5d4t+W9$RMC=eZQ+Z}6zi-8Qr9V~oeIo{HEx-QaSg~AOB%UhXD?Tc|A^t3GJk9c} z7N?6li`R;ei!X|Q7rzrXoo@Mz6wAd$``YAr3{9wQzgZnB@HFBJ!fRpRC1b>eN}U&JTHXT=}HZT7c(_YuQl zO3aE!i4Th}h~J8V11z5gF)cnI4w+@w%XNIc*sk-3iRX!Ti!X~Gh`kQ9e1?lRiqDDP zh}~w}eS^fw;uYeZ;(g+S;_sCHQt>6R~XNA+eREL4imo?Hw@Z+|CnpMGi2PM(Kt-37H=0D!gl>QahCXsxIAjtw`($v7Uzgd z#D~RY;>+UO;tkD~{xxxg7!p(BK(VyN?vIKc;@R`;d^l!o5j({T#XpGK>;A*UbK{oo zO0ib=jnVZ}#5P^OPW(>XwAIoN7Ke#*#J9w6#eEZY|3dLH@q4kaI9fbYT$HqQ=Zn?z z?RbNfaf*1T_?kFK_oc)YVtLxqg~SEoij4Bj8g~-MiqpjF+jL*Magg|!&TpsVJ`3#n zFmbrpD8|HUG1ATE-%E6Sv-q~yqr>uBBCgYE$5-n34)Hm$o38g0gW_EA58}PzBjVHI z58?)g+H)=y1Bco1rs83`?>%uh-8WY}S^SfDpZKo$saUG~(&8fVDsid!t$4TiiMYYx zR<1H}rWg{N#NSEIDIG5nKM}hfVfhRfM~hYBA!1TIT)arUT)a_yMtnhBF0Q-Kp0knI zM=Td7iZjGcaiMs!c#C+a_^|k~xI+9{j2&sueMC)wk42WgR2(VR zhzE%oafx_?_@MaIB-8ihCt7~ri=T*>on+^K7M~E`7PmfGUx=2Cr&=i(j6$q#S3)(R`DF2Un*|B z#M137ey;O#bbO?ErFg5jOe{Oy(oYg!6F(CJXV~?P#Eo_S&*Dg(pDUgr-YzZ`UlhL( zho5Qr&J~{!&lB$upA-Kft`J9^W$E`3SDbIhofjC-5i7){;*v}4dY{XTPwDsru|enO ziXVzsh!2W8UT*1!iFb=>@pSQPvGgjt|9J5(@gVVV@hdTKwcQ^V&l4-fkobbQT%7uQ zOV=UpAXbRCiYuP#@lkQ2m=zBb&lPVH?-m#R(b7LHUM$`vzA1hr2ClXH4-f~6 zRbq#@NPJqnSiDPoQ~XE_T&H}+fnt@|Auhea?rXZy__~hg==c<|^k%yr5^opp6ZiO& zT|fL5@u zEFSo0yMM8GwRo@iw75(>bgA8cqxgvUn)s=>LOka|yZ=$~b#aB*?Jst{mw1o(nYi{t zcD=7SSR5w4DsKF+-M5ptr&ud46Nf%x_Z=^uDsJ$oo!>${SG-gVivu6C`?BKeV&HK* z|B^WR2|NB!&plhuc~;z3`OOor5|@i*%J*3D8SxXbr=F7$-x4eJ+-;w<=kFoT7OxZE z5L17(`%0cN{z=F0i{Fbo>-sS9VR62Ajd-{Cp4e-d<$IO5uXwz8zWAuP=sCN;;d$d+ zF)bb~E)ks3p4wRoHOnz%v? zyk_^kC@vSj6xaHPUGF8nD}F7m_qtv0BbJI!i0i#!*S8cWi1p%JvFV?7-$vp-;vDf* z@e1)~@xkSm?xZ)3^WQQq5huTG$J2FvD;*CNuN7Yu7ky#(Z}FvZ53yDZi`R(viqDCq zUs<}v;$zD1Q}IXf65T&v_dO>DzP9vdi+77pi*Jj&d}H_RDefy?CT{$#U7xJ`TgAh~ z?R4K4UV7ah=)SXChdxRiB_1r!7l&u+pa z+S7P!FXNqJ=_YnOMO?I*9e3N@c$<#j5Wf(IZEe?&7S9&{EdC(Q6HgW276_EG}P3#nF#TGFoE*38lhqYPy?(N2Y;vn$|UB6NMSRAA4`-my= z2Jx@rJK_@YHSr7a7qRXzd;a6%a`AofNAW1}QgPkGE!}ovc@gi@@iOsc@ndnmc$)Z~ zxbYE|?=W%ELOXs;d`tX9{9e37yjxuFNK4mS+*_;@PZRGDKNr_sWa)Z}X`R1Dd|6yB z_SJm@#o^*a@pLh8jOE*1+(g`6EElW9eZ@9$k$8-Fx_G;|RD4K$PW(n(aiTqc(n-ec z#c|?+;uYe};;@tL{>kDmI^SQ%M~h3uo~PLTW5hRfeq9}>#ie4uQ|og`Hod znqDQCSEI!5l#$sBn}gk z;_c#(V#W7%|MB9};$}bC`B~zn;=AImKic*A;+^8pV*j7)`rdlZJaLKmC-EimJ8>I5 zf4tZz9wYuid_w$8?5T2%5@(8siWiDY#W%%uRNh_0sbZ^mhIp&^iuk>_o$9%VI7gf- z-YNbpR{d;pIZ=F8+~OBIKU=(9d|w>=t6k5CcZqm)?i9NQk^Y!BS;y=ZmdfWAR;-AFt#PM6&^<%^*#GYH(`I+K{ z;+x_wee8Oxc&qrmxYyQp{aEp@;wF9V{Qly_;@jfDZR~ndyj}cJtk~ABA1^*FZnmAB zpCw)@zANs!y#TUdryV&)hc(wSkIDCLzUm)Hub{}ZxYsAyU z0|(jp%f$D@-FCI}Y4I-c7qNObyMB_mOzb__&L1RRA$}kZ8DiJ7;yq$usGXlAo+3Uk zZZ*uVA1q!aek2YXZr9tz`^2?I*!d~qV(~?B>ydVSuK0WL6LG{SyWSx_AeQWI=l2$u zh%brTjJE5I;vdA%#8G4H`k~@dahpQ9(Fw+Nah zr-^5YuZpGP?0S=Uz4)a#cD!9*C_XH1Fu~5(iRXy_5O>(iuD6IcieHOm<#v6M_^7y1 zg`J-vo+rK`?pSHp4-sz`zZJ(-+4ZBv$Hk4S?R%SA9 z6njm!^ZSVxiEoJmrr7m_c$@fxSYBh-j}xB~H{ILLA0S>Lz9SCW$F8TuJH(&F%Bgn! z1o0Vh^IAK9pm>@1p19jIyPg*B5`Ph^r`z?D#ARadIy--mc!l_ZIAmYDo)zyA12gRW zB=HpSd2y?HyMC~EmH3f3tii6gi}#6Z&9w7V#Kq!^;@11w^||8j#ZSZ$``h&n@d5Fl z;%*1n^|W}G_={LQ%dVd!E)#nnXy*?SuMj^Fhs?I?S@9k*aFCs!B%UHZFK#u*t{*I3 zC4M9hJJ_zbi}#6Z1?~J4ak2QKxb$Yq6}=t}hZF6*o%Q`5EGQ;v3?Q zNxOcCc(eGeI4)(^j}{*nH=b|j8^jC5<>Jn1yMCVdfjA^%=d2N z+4Y0PtHh7QVeNLkUA#|RYk{4gA}$s?H19h@yj6Tf{9fEn^T55thCdISF+r?MKAH~vu)we=y5|0Bfq) z#D(Ie;=|&*;s$G3x?ROOalUwtc&GRe@n>;|?v}nvY!OcsZxo*uzZSPBv2d_&4zfv2PDcUoM8k?yi5GM_=~vxW|qEMjEX0T*NMx-FT~!PTe>mgLE;hO z72-qU2jco$Sh^wNbTKQQE#4!(CI)(2x_;s$u~|Gtyg__k{7T$vOH03pc(8b+c$N5w z_>s8bR+esRI?}{7jWa)Ml>%{ruIpUq-Kg6HK9d@?#Rbq>HqIjeDtoXIK#V(evOq?w)5-%4Y z72g*(8er)Li!;QGc%FE-_=fnaxZ^-eKT$kHJXyS1yj^@u>^sQP?I#{4UL-y!z9p`^ ztKC09tQ8aDnc{8Y-^Cxq?RT^Eoyb!0wiACvlQEPdr8ZllZ*&ow(I#OFv#bSUg6&N_;~6NbEVr(hU=5itXZs;(g+q z;#y-Z-7ex3u~l3w-YUK*elKplho#?3oGTtH{$Bj6_=&hlnWY;c?k{$T7mE*wZ;K^+ zTDpPa-eOW*BHk{(B>pIFGtSaih>ha$;vdAP#m~ge##_2k;w~fRP2h4M2%p@s92%~6*U4PV%MnHJ7O=f zkk~u1*N8oKjJ-sO8l%SYe}?DFDf!>?-jj9iUGG}&dhc4-54$~k&&-}Zdj{i~j@$4j ze2)d|X#4uu9Vg&gJdLTC!(ZEdf*mjlm*9T9kB++9t~|EHK{yk4;1&FcUiGwnLkz`9 zxDL&#DAU;I*2HMUC1933U#$9*~of>MpBG?#v;bh!^=kO)wX{7CI zU>A(R6?hmQV}{1st^$6BLvb$d!5e5a(RRhKDfYpsn1~nf73OQI?ftMT#^NeGioal{ z0Bu(Z+u$&qkNYqM&1TxJ1UAFII1RtYOZXZKG}rdEu^W!YHFzAKp=S$iR|P-ENL+|N z;LqsVQrnfn7T6zW;AXsxZ?RA-ZSRjga4dd{C-GOz8mR56Vh|3;cszi2(Ct%gR|Z>Q z7|z12copAa;m@>vJ?x3&aXtQsFED#+ZC4%J;|NT^LwFB8+Gx9S_$dy=Ik+9K;|DC- zR@*ng-Z&9AVlw`QIX~BSHL)X(#HDxyA7I9I+RhtWV>r&ko%kF}1!=n$*dJ%$X1t7V zu~4wK^T!@I7Qe-l_$y{@ukEU05Dv$9Jb-u5t%J5JgRL+OXW>@7itn&+M{QpZd*XOp zk3ZrI%-%`cRmb)?0u%5M-b0Vh+O8aaiUV;DZpZ8R0gHCg_6@K%PQ;CvjK5*d5N%fz zJK{)OibwDPX6&l%ysAnH_`TmwkwW}@Jsv#zr(ZmJLc}D?Q3CY9EHpA7(T+x z-L;)Bw#6a10C(dpbnc<;N@5f2hf{D9p2s(sH&ol#!4MpSEAa$A!7M$sT}5n%U*jU& zi?`9Gm$oa70r(Zh;TF7ze_;OJ+P*G+f#YyBp2DY?t&g^=jKLUzi!llBqWw#4R~DP& z0Gy87@F#qa1^a6I`q&*O;95M5shFdmw)+G-U=%LF{dgZ8{k2_rY>9(#Chov1_z}In z()JB86er<2JcG|MSD3b|hMh1Pm*GKti0%Wloeu`$V4RJ+@ESS|)OJO%F@A+J@c`aI zw?W#j47S2BoP}HQD!#+Q;o80)_QdhH9)H9an0>IetB&n)1Sa4iyoVk`v|Ty;6bIrQ z+>Y1r0~Q^s?HgckoQNB7A12{RbbPJtuaM43IwnV{w7=(h1{dbkbRzc4rRh;P6BF?m zI_1`S-gp(C;`%(q$xD2^h!gT@{!jVUcj#3>(`~RHPQeS*D?qv%p2eC4wY@*K$DOzb z>le~`0XPk3<2<~N!@acras0Zlrf1^@OvKk{FQWCTVjS+kyXaL^>kUA^Vww)bjKwuw z6f0vO_QoanBSx0cc2lrLN#c|uJ`Tasr8WQSGU`H1!c=rCtL0^IF!_7%Blf5K5~ko= zY+p{}bi&*C2uqdM^7815(U`+q%fCQ}kETmtZOqTVD-=R{B`!yQUu`!7r(q)ghA+^w zg4S<~tuYK|<25W&QR@%E-8h}{ZTJ(u$AZ+Wj{|WIZpX{`9&MGhzt;FEhT&G+jKySC zu>O7Nz{=V#1}EYyJZaZ@wkle$FjmJ_I2mVRhEKG9ZqjYA3l6|&+=P4ZDBePM;&s3< zoQ7*~AHGMIsydD+98dm1JdL;TDHf%Ed8~$?Vg#PR6nufjs%gJvaXDtj0BnO@@M~O( zPtmEm#!IQMIyF%9U@G~qu`>B}@H31?PvVur&e#KQU{f58Q*a)BkJqql4IP(1#?aqF z+>DcO2A;+%_zUJ~sPO`c*9#ZoW_*G#@XMOoZUX5Un3eqeSQ7oQH*UxK_ypge7vl=R zP+W(b@CUquwQvH?!{vAw&)`jbh_BG2kse=7^zzem2=>IAI2Pk@0dB_Y=u?Y0*aO3G zJWfT|+FE}D>Fsy`uc1l3U<||YI2B#%5RdeBJb>5Gq+T%g#2B25i!qnK#@$Q$D4xS7 zn1g!VF$LeFOI?iMy*0#9NJzQDIQu%5=vgGJFB8)F#m#$>#WkMIlD0XNKs z1+glI;5zhYKDEHs*a?T?0!+XGxDO9vGTy;=ShukrZvYm;aySAfVcefwPI?EP#>?o@ zMB`V+I(RVs-*qHCAGc#N-oa=17Be)}xS8=3`2|V)qJMzan})rbX*vpPHP>`&?2a)w z56|ES{I-R*+l#|n5~r0KgDWvOQ1e%Rs-DDD^!iNmt7CgyLHRG}Lw-EEw$^r@SOzEJ zWX#q^>*d8D?1Uk>0UNf}`qQ!6=bHW;`(p<7FJb9v7u}~MwA1>j*eyuYJ#ic^!%bK$ zSnK)XE^Lj5@d?`6YyC`E1RG#)?1%F)5U*ki{)(<0G;SD?i>K(n0sZ#D9hilFD&s`F zj#+x>IMZIr|NlS!*UrE?)_E_^w?8KL)boF}m-;KZVJ0kqMX)4R!aCRkzrt`FhS4|* z$Ky;)z!kU`&)`LTf*!qf+_kVBhTuRPh4Gk-sp#296@)8UBd=eKpRf7{z)XLVA2ZE#HD+jT%l;atKStteT#uFMKaq42 zhJB^&!*Lv*!#m8gz%Z>BieWew;`oo% z^2XR52V)%0!OeIGZR50kM|^_su*!HX_a@yMvyq=0ebIA*)^{;wtU626n`n0y$5Nh~@+ugNAsB(l7>|1} z<7|!ZiKVbJM&Ke`fM+oUbIj2=&2S*TrraRyz%X2biMS6_@im^sd-xb_bG6?x*cf|Y z7$)HlcoPq!*F25mi~bmhA?U|C8Af^x&cv0NjCU{p6K#$X)I#7(#zlhC$M<9p*zbpL$75Tr!MZq8+acxEZ2IaaVoCFZ!r<)&~5|1BL5F;yF%mj!-;qllQHjVtydC* zaRg>rspSQ6`zlSFq|cK+LH%UBgRjwdjkc?fJ+K@e#AM92R_o`-CfEbN#6_5hyYVPi z->7kd@#J@!F8RIM92cT1?e(0U(C5h^NV6z z9D>o*+kzMIIOQq$2L^4`c+og(yQa5eGJZf~hn9O{DXfmCaT=yzj-A@BJO<);oQgr% z7h~{SJc3uyW0%IwhQ9bI_QlaS1^3`7e2tFX8t;Gl`g#&KH&(&~T#K9VFh0Osdvshr z=!=c<40hhD^~Pd6?!b%qpB~?4XMOIc5HBW4`;Eu%a2FoHRJ8q|^~+!-yheFl(zg9t zFB4Y4F4zlm<3vorBlrwo;|BCPpz-TqH=K%d@h~>UooG9#?Q^3qcEMSA0y|$Ja3$`*6nutmN41{<*bpPJ4A#OPn1E|=7bYIp zc*pTPrr^(*ivQ{S{8#NOJ9OU0ozU^D!tZbo9>nwLds6Gyz(&{tJ7F^3#h)?ol(z4R zi!lyUE@}C5e1{o+()_$w1}kG@9F6DjGN#~Ne1-WgYuv(E3d>_X48a(ji-+(SUcyxL zyrS{Tpf|S1?l>4n;S5}aC-D-d;JK?BzxXw^BKl!-{2W6u49DU+Jcjo$({+uT%=P&h z-(ZO!wfrZvC2QL2wCaa-u_Xp$2!4-YI2xzoa(sfH(N7=rWSqWO3H`7?j>H?dEZrMA z-dz|*+|$_bjF$Jt1-PE_pGkK)tM&Tf9NdoQFo}L%;Rp1*sqHIaRr~@6;|Ls!-(WnR z$9gFmFC3?0BA&npn2N5qv|Smzf`4HCpS3)N<0)}Y)2q*`ZAlNtQ5QA80p%U>F;>2y z<&E((?2P4TKbdp_?!+wglT5tZ>vUHo`y*!u}Y8%WxH5z+dnse#DG-w4X9q1=rlw{PnmA z=TW{06Y&Q;gjewqdeXi!`eP65jZZQ2J?-BUi=jVu!=BjmzSbX&Ddd0hK=WH;2+qQV zxCA$#^FwWC#}+sQBXA-%!QSYPC-D)cV)jQGKOdGrUz~ws@Bz9#)^;VaAqL_!+=quT z8Ly!2iN?u-4KWT!^SwAdolO1>bpA!#SHT)M80X`6cmUs_$5U5akw16#qV(&UO=B08m}e>V|U7j;Ze*;d2{TKOK=}1VYZhV zzXbYXE$oe(a2xK&YxoQPff;|(xCPN0FVgR0e21R2Ym4!ilXfN15Bp&PuEpK>ulC>n zX?~O&@167$_`4ow@>}&fKEmREXnsWu!2vh~N8twCgh}`mU!d)s#w~`W@e}NXU*K0b z1LxujJcd8wRs4u9@3p@ySOsffV;qbTI1azXMBIb7@E*RvTpzT*{8$=W;AhwcN8xy! zg*$LBp1^1L5}iJ3f5ov3R>ck&g8gtB&cny(rHsMGgiWC*bu{TFvj9a z{1&(4HN1sSF{_jImkWzw6KsL)F&al<9B#%Pco-kyGkk|$&f0HrtcW4l1BYP(uE6i{ zGTy+)nAPC8uoyPOK)7Xa0~9lBlr|wpv^=3D~aW>I)-2m9EkHU z9>2x2cnR;IM+WW36ANGiY>J=baE!&NxD|KdQGA9k(J7<$R|?Bx4eW}cI0)zCVqAyk z@F%>BW+v^&9dqGl*aZ`CBW}ke49u)?g0L_Cr~8bU$d0E2NL zPQZn@8IRx<`~_XIXk1?m!9+~|`%x*R-(&f#+Aahq;vkH~)NG79ySfB-;T7zkL(3=Q z4SbK=b87iD+)BFxcmjt|KN|1hQ}m_%FkFF~@Dl!kwp`k8y1t~R;08>@V|WEUb8CBl z?16@45tA?(Q}8z0nrU1EebFBq zUjO_T!l$^3{&t4+L~*>o|ql|F%UzrH^$&7Ou&_xgvsdAf_~5o3#0F!Y)jf7 z<8U!1V+y9?Yjka?{rO@@D@~8aqJf&8LwYxULVgE)fT`&7skS?Z^{_YI!;GJ4z1f(6 zNtlAS(bih)d!a9eU>L^WI84AqOu}S*gzvC$8||kPeuGKq)mF=EVgRd9dQsQVTB-#UmIJKKMGf22Fj=4V!VK< zm@!!6l|^5S#0j_z6R|4}#UwnBH_^4d#?OkC&>sV_9d^M4T#JYBCcedz9W=f#PQ(@1 z1qWgbj>mm?1zkI8yd><}Nz+4dH6F*)_yof`YyB8Z#)$)@C4Lj^gFP?? zN8@~4hKaZZci>e_|L+o0NVhSy-;i#4d@(o+zrzc-7E|yI+PiD}Y*+|uqCd7r&mLO8 zAlASRSP4Th8YkggT#ozjAl}DR^a!Ottb^fr7b{{foQKPC7v8|)JvDActcQa!0@vYb zyosrQ@&oC-y)7%651zw2_#4Lc)p#fHE-u3@coPfs z)B0s_7|zC}xC;IIYyAjZ^p&O)@FJeWKd^e3*83cL;b0tv8!;I#U<$s%>;p8OFSfy+ zH~^z@E$+mB) zhX3jM{8!sQ;r%+1{&No2aqj$DO&q4yj8MDdQydwo`D01vAbl0@;|qL`&QaPf6K2Cg zSPXr!3I;}NyDm5oBXJ%s$4&SpeuK$aeT2qqi(lbF+=zSeFy2F#7;V3v^f~+;yHFm1 zG582~jnwufV%1us)Oy$wTVogOj$t?$N8(tF!@XE)w8pQ9pW_!e0EggQY%oUK^}sj8 zYee~YJc6h3Jr<*#A2!EM7>)_}SLgA+TR)2Vp59Li{e_Ly<4Z*Uahh(0oiOfC?kD{b z^NrVb4RJg94=^kF?Jxur@i2bG-S_}~CusanFa%TZJ!YGz^$K7a494ErA5Y`MN!soe zX2mAh5$E7aZ1|0~Ye1aoxFtO6G*3E{uvtgCF$bRG+h=aoxwsY2<8ySMujBT_ z${2`KuqnpiQap>l;2Zpae|0|F7U+12p+81rbqvGlxF4_MLwt&T)Bk&J(!XQQh1y>o z^vCuXh7mXpx8h-Zj82O*o(JZ}#@G%A<8Z7OukDr)=N#U~iqs3mxtM_8VG{m?&crQ( zmGD#i8i!*HCS$_{9ak*+F4lAt98UfoJdPhR*AlItA1k9jw!*y_gFoU;3|p#kVsJE0 z#_ur+vn~gihu`DR_#WL>=r}Xs zzuEoYs(I3`G6v#AY>H7Bk3Zr={0-k?F@8U2TdCvAk4y=oien~n8S75UZTHX#rFb0or)bb6ax8P1ZhKH!< znW*)v;6Y5r%lHm`zSsKIzEcO0o{kB42zzhRdV?_<txEViU{+$}9ItF4F?1Mw_3ckeLyEtClO8!&KMt(TP z;22znd+{=+;JDoy{}z6RU*j}*!hkLQx0gWGqvA7gt4r=*QEPqJTHLwZ#9@hL&{D>Kj zXnsyCge9;&uEv%}wSF`XBfo5V8oxTG^~T{+JdF?WCFVV@^($gkY=wasjJ79Q-yO4H zQCx^iu>;P;1mgUJPcZ)pjq8O?@G-u@(kHdv=U4}QPicNH9D&;~36J5gSoBA2=Zo#| z4|F@N<&CiicEwi7THY5YU;-Y+UT3u4U>t!f(DR&@-zM#JR@3iEdr%&TAvhjOo!9zR za4K%W*o#_zo^%R6z*MYuLF)}8?Lj&x`eFg%)xZ!Oj)|D%lE%w{^|2WSVl;NgN0^E+ zKWUsIm(?0L3YXw99C}sjJs|xO-{X=iT3#2MV=t^m`=fXlKVtrC+P*q|jv+V>3tiWG zfP71!bsOko{+gukNki?)x#SgeWRm_i)$q1Io8n{hAJeWc~bup35U435J| z_$#J9*7kpUVdIA+#*v<%{HJt!ehT?6Pc@Dgw#Pi!9Q$KD?!ZHM9Mj|fyC&1_2HrxC zXF9ITm;-BIUF?9Pa5DaczhTx?juU;b1~$UAcn}|8DvrgycpDr1s__G{Ee^sdxCRrk z-E(dC9TveR*cEr-C47Z{VD1+hrxJdOA=np(;46HK#b0XsviKwUzu;aRKzR&K!D+Y) z_u(OYgukN4Z;Tgf;doqu`!W6RZ6uQpe5LKLV#@EDuK8N+g*MEARk1C$$1v>6cjFk+ zad;2|-e~+*xER;qOKkgA>vh7JnD-COFN7Vi8_vTMSm>SB55QJ91}EbgEd5^Vdt+}L zfGhC|-o$(#v|UjQ#uyxf6EP02;U)Y7JwFl;KS6(Nf`f4k&cVCb6<6bKOvV@JX)BYq z*vR^ zSPk1^2oA-GI1`uPDZGWx@B{9^3?ABV5%fhrtcL?J3Mb)GoQui$5%Xuz_$BZ#e!%36 zT7DiqGHJRzHpH;ZnxBBHa0A}M33wUvcxt<%SRY&9R~U}7@d27yw7n;G#aXxzPhh{S zT0apxlfMPCkzW7tKHzgA*|< zx5gQT-(wP=v6@D`CxtQh=XtxuD}g=0iUC1L5)`& zYhy?3hcz%1C*fQ?h(F>l_#AD8w4a>liB-`bTVo&Wg`+XfOXKXs5`{H=jC3;I#4MB- z!ur?}L(sd3#u-a`3+~6`cpmSg6YX+i4QzyMupbV=;TVStaTzAz13Zs+&{0&!>x-XZ zU2KViG5vqvErIlQe2r!?jpvPl7>3a}6Sv_(yp69gu(-w>hf8oH-a)g3)+>Vk*a1Uu zE$+ut_z=yK8pjKLu`<@eE*OGgI0h3i2`}QpQX2mzRw=FN6v{i2|2sNUo*ye<0}R0x zxB<7~S-gt1%4mN_@gmMGtNAZ6YdK9X$IJMtyykm)tDj(P97jK=Fcq`VPhm{O_vlW) zxv&^kz}nahJK_&`37_C5#(NRBFzy@p5Nk5-);I!}U?RT9$v%4A^Km&2^VR%loQes! z6HnnKyn~(C{Y=JP_z-i^Z$lh} zOYmFFPXC3lI=023I39Q7b^IBh;tTYwtjAjdD`9*59tYqW{0(hYw0$8gilJEQ6D^-l zdL1?-e-zdw{}w()KgtK-Moh)*RW-gBX2vQQf?@a^ZPm2hL~MeWD9=HB?1!m!wY^zS^+I17h6SH{&IIhIZN&#Rk|D`*MFJkPd6C{Y2v=oPlw;1(Wa~ znoYF*1JbY1ll=65Kg5@`KTgDjI2X_1Pnd!)u~Sp+?<<^ut1$^5qgQ~|uZkz^)pS^z#xpH{(ihn{dX8hy#)LLf5xZy z0<*W)ekx!q48h(QhLIS9qi`msVvaT%*BfhM3tWb)a6MLQtL=v1HGGZbKiBexI39b_ zz9;n-;Q>s>U$F-5df-~zh$k@_Q_!oO_V0^<*bj%|H#iX!aXTKyWK6+d(H5lrJecgM^eX9?_#y|E?Z?1O&vI}q>V2OL2^orymMci}yJ zj=y8(UOFyMEP_?B5jMv*cpe{N+uj-{1p8n=oPl$28Scj8nEEF_l6LQ-@pED{&c)q$ z5I@K1xDE4usc}kRS!{~kaRkPoPhV{}3Ez-kh;((Fh2P^@yo4_?b3ct&9(~at*WgY( zg!l0r=IXC;Dq=bQKE6NcW*CMsI0?VUBy{~sI2#5s%>u%o?WcKgVj=8HZ!) z5UuwCvkldBJNydca24*r9=HYHq0`qI#}lhzYYai>VOlRAw!tv8M`(F=^udmpNWFV# zBflLk$2FMDalgdJSZsjyDdq zu@jEMF_?ho(KlS%wZOg@gQM^~rrkKgaGk6x(0|CgU!Ai*C%fU<|{d zxDdDCZA|1k?!g0i3h$#wq>kScZ;|gqx-$CXS2za~F$wSD@8}t&@e5#4ERT)xOAN!h zI2C7NoAmc%(nj)ACpxiwkiLeuua4SA2t?j@LNBI05J3UObGa z@H(C(jvwh37>-ZSHbMJof!(kl4#pL@1#jaUjKde`n5c2Ruo4c&zq;T3ck4glzKWxt z8Mpy=;Ym!zjFWV{6)-=RM}Pbj!|LC$J9o!}XZ3RO9T$lehy@G4C?1w*hzIA-su?vDtF1pG^7{ zmM7n9g_Z~68?>*~{G!+Z^P$%&EpLd|@ex*8t>wkgbB(6!;w^lJZP#jfNv!#;rsHrv zuETTKXr0yz#4wzWBXBC_T(9*@V>9fFBXBV$;%!XWpzRM}GWjX!N`4T|#f`WRAERxf z#>;}0F$g0t5r0H8QQMWmX4o6OsOO74aVRduU3db2$Lz!@hM!|7j>ZL;fGOzmosO>z zw!nV)GrE4S_1fS-JcReL`6jK`7bEZ}{pBQH2@JwsxCXD{OLQT=H`c-y*ag4B`M3u6 z;(5G-o{X;$euwwbWwRbn5v+wVn7BpDuaka-VdPK1Qy8{Y>rcZEI2%{uR(ycjw`sfT z*aCaw5FCeVFbS_>_3awRA6H-~PR1p;4?}inyTP~&f54NNj90PoPHoo%hvO7Hf_Jgh zF0KCwZYGWgGl4njU)bMOh)%TI*wzc zKVm`hTVih^uiFFgTrwfp2KJ8u}|Za#QN9;BXJIH#S8cn zGbd@BGT0D9FcK%=5j7<7A44z} z7h^JB!;A;DT`FlCX@BgF6Y$U>t$!KI;q}9s{|@sV(R2+Qhaik`>xc*|e_ zw#O*^23KNj{1R{CFZdes9M||>7>CK}`6skI2IDY&yni;8a^I8M&L6vD4DQ5XxDJ2B z518ka#wm_v&=*6n7p}tX==!6!&x7IQpU2|lx5TkH6BpwFyo!H5?*F&aJ4t7As*l9E6AQ0=C5ccn59C8aD$LMSq-&8?h^fV=}(Pyk|6CRrJR|T!+W;6sF)K z{1wZd)p#|qHtxq8=ygu(eS$-AG)}@Vh&uwO;}LWt-dKEr9<55ntd*Ii&11HjMCDy#C?H{7oB~6#d$+#Ai@H*Zl?r-=On_t#AJuni- z;7mM-=W+EFZTAD7!>m^|zc}{6;kXkkaGtv3R=kh7IqudNhRg9JR_1(k!11^gZ{Y8k zi}MkW$MFyB#CchQB{|+;+=%aRFvmR`D|39y@fg~!>3J!FC9wuJ!(i-;f!DQN2t$Oajb)1 zV|5J0Z*UtPz@ymRu75u-ne=UZgkRj&e%&w|7R0I;f}8LHrr-;Fiv#ay+&owmy|FQd z;Q_phsc78QIPMsWrLhv$#&#Hk=P(tGd&I+7ERB`0HnzhUJcp_1xUcat<2dxjYFHmT z;uyS)@6q-^wH|Y6T;}yj+xD=aWYwV0eF#%tp=M&;#8C;4@u{Czap_qWL(DN7KVHsSC zO|dn0#-W&iuh8=;@vsao#irOAJL6DHz*p$`OyiY8A6$hkF$lY16t2K`=$T49^ublw z5`(ZCM&Sy4hn~L@4}EYIw!|RphEccz-=XJo;-L?&!j>3>-7pGQ;5+nuK|J)qRoD`P zup36<3Ves2FNudfxC&ch5O%{TT!HV<^EZuG9xLNIY>l0;7mmcW==@6KcwsGUfV;69 z_QS7n8t%lbziYh8vT;cJ-{~RkkAWD1VVH=|s%pFA@_SP!C#OF$Wwelox=sFHekf$b z$rCI!o6WYSu+3&T6>`pO7W7&rwQc9Sr>-xxWOt#cT|M2lbq;kk!KK`Xo1|B_R^`7_a(xV0Ty}!|I>6a1pe&28=wSZ*04!@JJ|CQn?ueJ@S<2|R_ z%Rc2L^Z0{h-fxRkY&mh^pb=X?n_l&Hamk$d;+M}p+4g>2)TT;rKh|thVSsg*W2)r! z`li6mAw2YDFBX0by578N zwcYs!b{Xp{nZpk>nmwv()BIDa6mNAZ>+y~oq z%>JK^8#{Q@%-W%o9)Hnj`7is96_w^aKP2B=w5Cn(9ADKwSLo)T@2gwaXh6Q_9r_>r zwdR@*y>eHs^>f5Qxx!9W&fXohu<6iO%aa}~Thi<3Z zm2wPnE0wuU=CY6P<@v?BMxR%Da5n1U{38$Qnm4L;K5^|*Ny&V7@`s2WpB8Gdq~5Um zu@gI8-)5b<4+-VUKK1o}ZR?fX?_OKC;Q7{>uTf-naK^xdM;H1WYdNk%sRDcCgg9+{ z*YWy{N5zhvoBClx)_n8I+T5Iz3&EG1gqp>Tb2dM- z$VJnz8*YAP7Gp@bnaTd#O1k^GIGNRqYqn77&&L=)!q3c4N8bOsBkw3@z&D`zG{EotA7O83XGuB2+n4g@y z3)=Q@H%#T-Hf`+9^lw*kyXyL(NP9ricFN@G>*SrX}=GDF`ywL|-&B8JC>o zLpi?+_JT%9D_ z(Z7Brd;NjWCcTU&sY;bWvk2mI|i{u>kKZ{1jz!J0A>Bz@9o zSCgeHGl!$rVzZ%rotf2M%d8-0uu!<^8ep1s9~Z-P7=fX`O(VuF)MA5SaJftcHEKC zWHIhVJJwmVR>$*=lV4iy%VJ&KWd6CO!YCi<66+S=`-N9VG`--e?eR)|4Bps@?EgXZ37cHe&6@)=1}$ zR=q-YIpyAR1m0%W05hAtdD;Pa$msLC9}<`@@uv#OCeLI&wD@~@NObYeCnE}S05#BPFj6=3{W-d7guHMEj$*C_Ha=T};8@8i`j3dsDI5UI$ zG-pped&wm@tLLRSgZo|SK8IPveXvU|StkxfI^xpK+%A_O$uNz>kwIzcoi0JnrUvjW zNq}o|`lXS(aVyfghun-^MyTmkDeWF}vpe>f8Rg8_jE|Ata$*bEjVCec>6+D(vBD+PZY+unHH+Gf zhc14mqovF~Yp(jHS;YQ{HKis*%1TkpZaj5~x02H%WtGipH&R{h$c0~*cG{ebVUBpa zF*`EOI%9WS|6;?K2(jgRR)cj7V?k$9N{aT3!BQS8_F9yTqEs zj3IJK=`Stsj$G2Ot*~_Jh_oByBZHjX^?dr5=6t@9^C`EcyOA8ZPNE-8zcuBgdD@Nd zT>PwaU@VJtq^-}!H7?PL$n?`Dy?%5#sxeOda}2$`{kXk-^!5&wfW_^`yD00rxEed< zpxmpNMRY36j&!8WQRA`nBX_2eB#WbyOo{DIX_qbSJelTnmt1nL)^XwFcqf|PrtPw{ zsPAqhMq5L5HO@GN%jy#0E!`S3O+AiDa(SfC$#~@wBvB(|G1qgiF{k`v%vn}UM++Hl z9(TD~)@*Z?amn?SL%uI@U5$CBbu}udouU{$MNY;@GcC0B0QiUDj*7ha_u(d3p_gdn zS&_1$NUSGBzLpkwxfx_!iwKh2%~&IyxH=Zg!FkCONAHX)PJX6`kLlFjZX`z>m8)#5 zuzHH?Ynv@?+T@aX8y#=DIb@<;l8Gj#?SLs)aZiMo=`P*K z+_x56$)4{dH^3&-&t2}MW*FNfwr!8BCWTF7kF$%;>%j8N>nYAoQazWuu|L|XZW>8W;pvM>NP4B?&Q7L} z`zW~&cba+Z?X3mTI6pkxn#@M2_P@9%-43b^&IQW@dp!rn%e9Xq2)pM`a^c zo?*r0u~tMTsbT+GDx1dH;i1k2bWmR23~HOB zxn!2pe99JQW*=o{4GuOlE;c=ctifjXW@a%taCw$ywg=14EzB~~T~2xUWlu{-n0e(w z77mxtj+$ov(SyxQ>rKyQW}#-X28YX)lDlcMjH9B=W!Ig?mZ%_WZTQiU0J3j%GY$@S zT(q7V#${W$%mp`N_J}x1mo?tWBfLi1v$2a@x&Y&k;nrg=o835P$Ubs?)J6HhPg=+g z^4)N2p~z;Oc9xaKQ)28Ne$mW2-MZouO762%iDg~>Hxk3sdTP&;xt(jhnXQ&Ai{WM- zd6EanM#-w3$8PL2<&Ldm=ALfmkj<>!*dO8WHcQF6=5*1_YvlGb^E5MS7|RT6&wev1 z+!~!(K$abAUds3mJIOSX&elhH%lwkw&)9Za=fN0ntTWw$WytPE%y4U&vig(9<#eae zv?G6(w(d%YD`d+qr$8Q@5=zd2l**WIMEhBLaqCd!oZgUUsmxyEZd9C07PGd|RmNdN zc&A18OFP>1;8vcS@m~cG|G$D;k0Y5@-ZHEJGi$i9R9JJBiLxwzhw36;1fFUlI==I&Vk(lk$*Zqv<7 zj@r^F&U%*EjVm^*rD<#&A&pGw=RTF)kVmMxQaA~$1Dfb57f%K2`V zwp8CqyOT}XO~`zoI$Z8TYp)wGd#hMu%5d4K%87gLET_J`nb|lU9VgRnlXbNml})d? zW{GB|Tl2IyKiI5orJJ#DM37mqxoOIDo@N$lF3;(8*3GqTxV0&WmuuV0F2j^t*lxUZ zmXm(WZcG|3`&*WkaJ@32dS&G8$++hnY8Du5okrvI@aJi#ad28s-^m$qohCP(>3ZC5 z{1&xSZZ~6r5hsh{Zka3p?M?4xEEyrIfZbSVc$=kz<&4Fed8|1mJ0=!>m)Lb>r%(qASY${2zk`I1)CmnCRgb8_NH@yk1B{L>%Us*zS$IjMn#$EQ5)+*#`EEo}Pbi3osD@Q_Ars8dRJecm& zGfHnmN89?PDKkPf;=&Zq~Ap!#L_`6K>{;m2|uuoXp(w@&L7F z>R=g$hq1`XI{BIE$)%_uC(GI?q+Nvl|8x;Nj1x|dShKQh+dcg3Gty4pjcDg$)-sq! z9xUNz{hnqMxy?N6z06!E(#mf|Tj$HwSS6*NvKEb(Ev0lKZ%WoXn{;E9+KnkAtUJmu zYRcrknYNK}ahKO)Uq9y0uQuKrf}TtD^w)-4UqWO7m9!QV(^cz6ae(E{1hRY7^r}X=1vMlBbOIZgj>aR7U?M zQtre5_Irms2`0#fzL4yzOUS7z7vPfLdRG4wX{~>rvNMmBGva66h8{Ala?~=7W-^C{ z?v$@ohVe$eZ&(XS+U4=8!(cW#Ihi%wjSq&iualXvnLL?}M?IIN;$qsJ^gP14OY>W| za7`z(j$8;2+1&ou_py8>{IBn0>v0n+VdVAppp$&^*c~0Dce8VG`FJDa{l9K!tG0Ff z25>ve(}@i0ezvw+a^c@N#hXRtQ-`1FyI8h9vWJOwJhvW}KRC&HY}y_1#=hvCk_r+X zS>=ZS`5{j9wlch|5?Q`R$*GhFvc0A?rKR9G`7hG)OUVZjsr3(kS~b(wIzOo_-(0M? z5=O3e33<@U%Rq^p|MX{Z`y#v4yhB*$;@PBPoOC!3BO zuFlQmg+cGv7TmATrO3Kl<#X77b+@LUXHR+6mqU{oEw`*(Fyn=jJb==c2*YUUZB{pi z+%a?CHC0dgV`>9SQZB9k*q=mvLWjW@#p0Gi2TLGv-J9!=33R z+dOw`5m@9btMuFm>#&SX=25ei@rkw5=`2t8o^pw;b;T$qH|@*_Z{vyiT;4cmMub~W z?Bk|HmCvna`?UKgHB#1K>pm(hUlim%GJ4CcG(;BKxe@Z+OCFY|%mBTktTiUF0mF=y z{hVx}-Hqwd@>cp-UVLP(&5=9B+U4ew9fP~v22W%7?q zj5dOtozl-mYI-Nv?R2s&LOyS0O-K)JW}0T^+V!_>xqJ@~ka)JVt+^~ra*VRP$h~cB zm3Nt(vMIF5oA8zqLDmmuc57Xe%(-^CXLrfOk{z+1(986aj{?4O7uS*!H+#)5)27GH zVb&U)%Qz}`jfcG6RJZp|`xfV6zbh-@upqOMyf?@fa@ms_r({oHeJ_;;`HT(n@vFEo zGBQYRZAVsXZ8he~M^iSZ?&c@*b|UWvrlY%@p?$+-1qXnI$_7IbcKn^Dw4TQX{U>xb*Kw`=jK!BPZmj z3@p1W#=2bTz1MbI!1n#;AXtH#hT^&q@Br0Bf{lO<_$MK50x=X=jWo*o;fnt<+GF= zh5XVZK&EV|Sn0|dXxc^0Yws&Z>1P~}Pk}zh9v69e%qtVYPd+^MGJR#Y+tjSmQ#SL~ zFG{SBCKc^Q#6>etJDCnavKLw}pZNV`iy-T`?AnXlTczz}rMVp1GwIIymMq7XSAto+ z9Fb3dGLPg-sBDLWtT)1O@{17-)l*^zm_9)gp;lTn+18-o&a#46l*aPTnl?7; zIDVDe**cDj@?j&toC+WLwUiu30lBv2tXssGB7J!nGvzeOrCA!~EhpHxX}xbRk)LJZ zd1lBBH%gk?_8x`Dvf0Voldt@(yxaUgp!@zLpx+?a@$f?6gC1?vM&x#`yX zi#6qP#9MWO((B|ghDe>6X36T-k!WAp(tb4*`}bcBNvukC;|puf<~HP)Tk?^6SlT!4Y%GLNz9y4I@)j?lG8v;~+i9l#lIX0w|H`9qp&{>m&8%~GTVAKEoYiuFSf8ki$nOs$99bR5 zq@i^p;++eppIQG_oY^Dtww2HQglRgzbC%CCn}^@A8)xN(Ae(d1+-BxxW-ViwU)sjR zjFsK8ue|=sbHkc3KGp-MW(ySU01ITrZp{KBshR#B&mStkPS!{ z6^UXXClwh$LC_%?2?J(*|5e@3=`+Ki>)pNIz2En}yZ)GSs;jEItGlbKtE&50QiK(2 z6Rj3j-sjJ*@@Z6lbYdUGND_|Xmu|okvj65YJv#;sE>&=SiuI#FTL-4A-!RvN>|TK# zrsdbsN%))|?#ZBW-ob=e+*35ZnG-F%lr|N}&P_NMQ^oJ8NXVEekwLm)zlYBwWrGzr zrSp4Xx4a%j#iJG4#NrKthVgG?-$Dk*()6Y@H(OKok)vHiGhkr5ux{B>AF4WAu zG9C$B7&4!N*o||R22BUMFJ=e9af6l_%ZzNoi|JU?W5PCyKkjB@1{fh1@lrn5Xcr_{DP+D|QYcU>}h->kYStnRJ()c<}99DPF zj(3lDH#^gqj^c%`QcVo2i=0$A69jTnIS;nSv-h7{C|h=WL~fc+GJg|Gyq#$~sY*3$ zl8Zz(g-jS^*fw?>GAICUVh>QuKja9)k~kOrPAfOEx7Lkc2CrQ9qYgp6lYnC6>r6OB z;sf4@W0yk?N8uaDL7&#fUQc>Ik&>w=0!^4nQI1k$?nU!`Vx-!-Y&5vj#$j?XxlZCl zRubgI>#8S32foQAu8aK{co?Afbz~-{j?6moUY2k$ZGozY4${dnLhR)fH$pn+{LeXe zr+19k4jL_GDHa9UviaeE<6~X^qRpCjX3C3eqb&gxR0e*Spl!DxAtxM02JL2#^ywUF zP9xT!ZW>IfHu*MLfe~$K&O;}kB(sX14dwKMm`ZxaXSK{h zz0~#fOU~j%krU<*_}k6l(qjoC6Wr>8%mT9qld=6V!RZJ*i2VoAGA1O`|7#L2s&@$5 zwGNK#9%L>^;Bsr5$7Ep@xH?<1xhA$V3Po&)64e$5XWJnC z>6kem{fLq~Wt|K$T-gwcum;No`DIsV$$rj}J;tnis&_Zl37ZIQr@H@QvguF2;z7zm zJ@F)%Zm`A83g08l#^@x-Dsv3B(E$_3U^fu~2Y_DP2q18xoE#jv0Oi(=>N_)ViNDg4 zW@a&`ujAM|IaioRQ3o~oyo9FJHs{NU2e&RRLo)<-&(F~%D%1`4qVwl~l5U#C_X4Vh z?q3od89y&(E*>PLh=6Gdt<}k*m$JGz@vT+&3DL@(bV0&JBYHB>6X_|M{S}-%vO_+= zq!pLZ4dV1NFR`fLcZtS0 zNn&bXR-_e6WGG!W0fIlVj*|M5m7L{TJxE0F5OByTz zG2dW`ywZZ!`N8kc3F>m<+DY3Nrxiu3hRZkY3O5%%L1S~s?Y5UlB1uM7TN8tr7noDw zcJu#(3iViAzG>@h`{WqDC2ZeP>q{y%Vo^I^(jdPGoQ z`Ao0)Z1V|W(`WD;eG(CU74MAMoX0M74j7WEBQBkq@(FFgzXjpHf^#Wm1IknwiLdAr zGzJ_-fnOZd#xCOD+5X>VvDdKljuCd=L)17nM9==O_Y$|5x`xjDH@b%KSBUf2n48h` zBk+r1f0z$xXL@`nl4uhI>+$dODy43A7TiQJb3cg_49Q24qG$t|r0Vaz+D+2FF+yDl zv;UrWp6uu5>E2ji-m+6~i4S6?dQ0fFc?ii??G&{8lbA;+p;LougIQ2nWQL!P>a5J8ZKi5&(553>fo#~_gTt3f*e_>kVE&HI z5-$#8`9_R=Vg6mE%tkYe(|smx&Rw{2I06ojalF`x+o+{{#D+ePbJAaB6UaCBlU{=A zUlGfX_lTJpcj46Fc=|n3Y1WaMP%MMXgyGl<@@K3(&UF=CZZB7ncwBX};vP1<3Zd&N z9D>+#sxU@2qre-8>qNzlcb4!Yn7xLbm=VdyAVPHO79AG_(rt&OvNbc8Y<) z%=Dpi2~v|0)sJLNIV?t`^bz79nJ@4WR5e4Z z#u2y?NuxL?nMBb&XxyEtAZY9f*IAq8Ab`OpUvtV%9#buo^iC!m|mW~;R4c(9L1!fm#b(a0`D71m8FV=Kr zr`OhuNYyj3Zqm(FMT3K*N7!vo792)XJ29RDa~(2FH#fv*$4A+HqcPL8&EMTL-HmaR zlZZCUW;m;x85%KaX*NYf?^f#)9Nrog=wibwNP3ZY&@^oEhBwM=W3IL4Xw}Wy3d^*- zme}g3x1Jh19>DCaCH@Q-I$qRkuU2%CjWx01>DTdqg|+>jZYHrrWLkEq(il^J8Eur) zxwu=Z;^)%EKZAMo??uTrtEuR)E(|=eCme}P0n!xM^f=C1Mbj8_o}+)b{`B|k^+89X+0|IEy19+(MZ`@@Hz2vQJ5ef;nSMdl6*04_XFh#wLhcFX zXz!qx|tlFm*seb$u3Lx zTUI{Ha~CIwBh0XnZoEVn&AlbITA-g$UQ36vvH)uv~9NG8z zK`wT)!#xZ8T1RAUT67t^uXS}uZ2Aq16v_7)(2M=d*Rh-Vh?fF zDNdSs2f)ercZu;jv_@FM0_7MvooE{K6Jv<|n1hehD=nI*Es|(LOFENpHWI{Bht_+} zp*6Wzyuiqpqw!=3-=kq)5FB0fPy8FZPkVcGEZ*0WI>R`d$b>Tud2II#NQ(E>6g}b_;1~9Br}wM{8N*rp+C8aBHHG>LkpPh-4;n&Xybn^9FI%mKdm{c*M-&UWICm zq|xYZ(QL?vxSy(BC3ov8yYh!C(O&s;*Zty?mw$!f+ zc|~BQm$@pv?pNvUw)9%?TocRLa#UuMZI}GKYuD&ga_*`0dRnEI&d|YwoM22H^elNQ znWj4HF5X(yZ5PSoG0jD%Nr<^M_C7InP2|3n=(-V~LR$ReATvR_2qat)5$#tFlcTHY zjqd6uI=h;?c5tnp6dGZOnNcLLMQ{&(+;eu2IUY9Mf2rooSo}6{Mg@^u1?{imcfkso z7uDo`Aa9a77c(u%R8NeGnTzo89w&BtUkM|B8;nw#zQn0MrQ zOh2}bW4LEDhWUfJ8#`A1n)k5n+1sWN%4sxC;yq;06Yt3nPAlRv5cB;$t{wPb&n1I! zxs+*+XEihY9%8swuC{k_bE3RlOGU2m&saw^S`r)imS;LjKC(Fl=kR^HzkEqTG14;T z@p}ozmY^pJgQL)sSuxrACt>ik5sigNI&*^SE%7VUULsc+X0Jr?#eY9ggHnynJ7%5Q z_V{D<>dsvJvBq3CNu@Q|3e>RU+DyMLF40dDllt5fZi2$`-;kJOCHUL2lFs_gtMazf zSe0+x`{@gxb)T~}TD+=o$)Crcu%D$HsR-=W9^etD<<`Gt$o0iN7ru7Qfrc3kR><|z z3#Rp2({;~%+dt}Y^61ACs}`=3E0&E$yl_;D(QggByW7xH#_!vAXQk`6((AgFE`L_# zt@k7Zt95L*N|kXBGyHgc`#iT(a~l*+iVloH%7=1_VtGG`zk4x zB$9u*c+xL3{~s+zLI#gMUZPslgJZZ#R;%;!z-;HRQ{5D3Ca@v9GUy!S$ciiC7O7mp zZmV7d#N`n@nRwN_2G&sh7 zY|h8@SgR9a<`1O=cRJ*SIL#>h9Iv#6=k8!A+{SkTQ-F~&46j7Hhej#zGQw3pcn><8ayueiO{eU{~swybdYnh%=aI%Z7X z$8{H;-aGB>-?I`b0xR8$nr49cUht2M)u+8Wv{|!t6I;$tYg_4>sq|8GrE6xs+`dj+ zcl9l=JXYhvSBrjHQu4%sb^&+wBNLx&JZ){yq1XTI@Gj%$9rbCn(sekpKA1(1JwI*U z-jhCQy<+06FVDzZRW}>p%)i`R=kI%39@()@|9lRZk`U$SI*pU?De%?Y=8QKOVg`8b?LF!%N_M& z*EYF#$purNF^{zwvj60~rj?ot4BNr41IYAz(9IQ&&UIyWcGgXpr~}(n9x9}+Il2Jt zAe#?PtuB}Vq~N7(XOd^5X~>U=?hVotcX3@x2!Fg+jDx;7@_)RyfWLCO85%^kvkj3` zk!Grk@sOB;=ekwxAcwp{@>t~}Zxs-hlin>+79(}P%t<+d^W|Z3a@d|={R~F~li04P zuct-t5yzGw<1{!upH-UR2E<M5VPTAxtdYg?Rhc}8KQCm`z8sHy%cGy; z@q_Rvu{tX~EsBibs9Q^j;>^2{(YoT%(mlgGWMzQJL)aiHY9 zY74M_cSA3!hl9Yw^w>kE>tayUX9(G3fDstW!-n9KE996eR$)r%W0caqGJ}0BLwci) zo9Q6c$?vPM8&Y6(`OHq0(zV}}(zVNwRu-rEq__wx?71g;yh>|ml~TE@kZrChh|Ola zITqKLZK~0^jzQBlxQ>`wxQ3jBZs$Vh9xOH{(5Na)LIE)Vrv^2nd397W>+y;rnHQ>b zvy91L5XOMo&K8!3{bwku^_YT)YN}qEU1#3mE25glR}~e`g%n||ky2N9u&Br@5>ZVc z$*ksOVKbO5RZ3M#a(^T_zqxQ?W%pt#6xW-|;mk=%Ig~OdTC*Xj1nY1E!by+`QQNK1 z7rCWr%=_FO&cT}_>kk3moI5!PCNp3eNF8rZyodw3F*Ri{+A7*f-;ru)5~F1l`rRW-=7%cyF6zbhX#;X3#*bqz&GbPi zY*hWlB5o~y6w;IL#>xdY7|{xBS&+Rn$eII4{34&?%)oXYhBv|N5=^Y?;ey$gNVI%M z1&j(LPv8np+Oi6d74MMtb%_Du;aMN`*pYr)DYVPqMBYvUW_lm@C2-1eZd`M6$dMv!xhpZ*ogY#rPu9rw` zE*$R0={U(&u$7pOtAGS1*A}U3gmi&3kW0;W}69TIYw5Gb?qEvarctXjqol>c#4zGio395UQ-WghIy9Yx}Eln z>c(jPWc0&;o=F9f#x*?V7e4P8^CvA%WFgVjP3)J6+8MK)CS=5(4iZz5RaIo=5uQ_o z4F@A&y6NUMLJ@YLe{p({$!1tI>_HjkR`MH7y=YS`#KbatOzlI^jph3@?ce}t#qtmr za7>~f`tQ!!=8^Qm*jzlMcQS__Nski(D&v+8!h0$m=aQn8g7!3MbI3#zw5JuV)S`{E zXt4MIvq`F`pp_rs@+?mUeeZpWobEk@ToAg9y(pXFRdAiIFf;ly#?WK+m73yHK z9AYtlvhDsw)IJtoLKwt=Y4X`ZAhk%q!%^lJ(UiXnd(^$LBc& zCryP7_zLUwIS3y)eL{{2i%b;J7k8+r#-J1FM;B^5S!%TXpvFO!*0}vQR=CesxXD&H z{ZJM5`L|Tqm&agS4_Dd>U)KtCGSs1i*?yp=$dED_=3B%ysQPd+)CM!03@_dxli~e% zo_X;OoeU)`HM51Je@nNS6Gu1|q=uBKAT`|0R2XeBzjWiPv*r+}gDmP#DO2GTLA8^( z8C8+}iL;+OC^8|K1lI+P&(TRxoH_|UNu30*>!f*|Ny8*qdRdm~Yi?gaKwngGxDWBRycJOzq$|W1! z4JSJSKvJp;ZF5!Grs73#PwzGC{+3F2ZbMF@Hsef~7%ZIa`Em#62)OddR+y?e-H~ zxiM^gWqpIhQrsvUHDafUOxU#g+4et~kkdwwpnkLkQ*p4gdV6=3l3;S^nfy1 z?-)zDMJY$QdN=xdr5s+RmDl^YrT#*xhq!w8B)q?>WsH`V$p3=sC8Q8B1xXJy zeY>mfc_hKgVW6e9pA%yBb!U=1VmS<~CIXSSH`2u*#y3DQ3j=M2)0s(kV7(b_x+pv;nr)1H-%#{y_mXivQ;0txnfU4CFLH0RVI%F`&%E%C7&OA>jFoSQbnA+T=$|QGrHvVaRN&0lqz~1a2 zg3YbDpepm>k3sEosdW)q9&s#0Lp(8oYdTx9u}IjIcX5R91|_NkQC+iUjg!6dnOpeB8Gd!wMTXn$c zQ~1*@;8D7CnneAXhN6`of`@p(71?3V#S_G7#L1%F^UP>Yv6{ zhg5MPNS=t2dx<+}HBD{F7$eCeF)V3+29La;&M#gkArdf~gD5Ucy7wISA{v^`titM- zSy>P?=*e^xWM$iuR3~{!Ek8JMHaY3?njPVrNf`Y)85%M>+K_Yxv?&0_uvY9wYTVKj z)1jK#PYA!3X^EUM7Bq<{*UYewSFGXj3dz~8L--<#zty>S66^{xr|2-q#3FwZ&$c9@ zJxlxspU6j1$!y0<8zUVFo4AG8k5_4*g08b|hEL}K`hhQ47V|z$5 zpTu>!iBjD+6~z<#a6IfBMB*+sbzfUlPz1LWyockE>php35OPBYdJ1h zvPnv|ma<9RsE4u=7L_e~LCZ=gd%=}$)G}B;t1LHC^sHmKImvRYl5M7JtRvf+WI0O9 zwo*3Am2K0q=-=@SjqFB-qS;n$Z_Hd}wM#S6E=+^QrRFLLm!W2ANgH<(GmRvoH$yaB ziFQ_#;%hMGkOSGMD}?IsQge;i1PDYQtop3l-jM1ErTR>%{t6Y6g=(o#H7zBHoxg}! zslKvQ(!jw=1;92CxzfNj+Q8Yife2U|xVIX%sBPe5O0-w2x|XVtDpi#TRfE~)TB!>0 zYE}Dn#I#!vE7g9bx)LhJ2`a`dW{#L`t`sT;OsT%HRMNl!O7)F2@M=3sG;pI3HJWX% z77;OA+Q9F91N$q{_gYmURWU|XwNK+ZuuXh)&ul)3 zX6+!uzM1eOYv%{kVx}Tu#S^fd+EAA&eP=PyGD#dPZV8+4=CG{ZW>E@egQ4q-fBVPm32WghMLfi)E z57}Kr4`uS4pj4(|l6U@A5@$&DV@8r#J%gPGNd1)aj&MEkL7JCv83%z zbv%E$J{c1SduEpq^=U=jZc$HjD7}Ns%jBcvrsxjAW*at4$fQB9Q}5|5C<%JxPYr$t(^D_4b*XxOFllYaEZ68H^{hp1D4H zLWCJm$_ojZc;Po#KcZa9tk+-QkaI`$PB3qdb^`OyL36nsDVNG>%GJoe;=B)PNQ*BB zYWCrVxy%^reB!wdr2jD25$!g$ zUke|mkWaR7V6t$}?a?bDW*~)pvV}v*Xj66c#qFYw_-ekmo!fgb9mXXm%XF7A9Yz_S zY{y4th8^E_yL5bHrtA1y<@+#K`mpVG+woD!K5SKvqu|IV+mZ>|O!U1uo7jrz$9O5~ zv49U4yAv{Jo3mwWf8rn7xaTv)t1Z}njGhekV8Q+h?7@Qld}b*Fh?N9bHT-1WlLh_y zTIP&O&?kd_Mk@OB%;c83`gU%Un(3LcUtEl8|L%4wx<9jk4@EKv*lJuIxmZ*@MS3~u zl}rg%M6Y2FTBg*mDD?%EQn!Knf>i3+wyCn6FH-8+nNs{(Z^)TJrOdGcJ`|-J|Cz*EhANooFgE4Etpu zg4%q(jckD`g56Fa;UJ#SD2T_5alTej3Hnh@>O1rGqi21v~7Q;ZCizM6qH+O8j}!BPt*P^O`}NV^{yR58HFiI z{jSRE9H^PirOfvNJ`|N3q& z+(y2VHf1nMSWaB^a)V=ekpktICuzDZrKgI>&O1cK0rxkPT!}U07SdJ@ zlX+(0Jv`mRn+*$zK(PfaVVjV*bn37<2Y4zn`*DC6B=Lut_Y|5HK!KswWbn*DY*DzQEne%J#iqB@Ly}$f|u~#Z7yGPVCxWxG-XSi&jg31v_$;cHpylujXZH7>A(ZyZGUwk2GxgzCGV zdJ8%l_ji^k(5bhe?$>7F#+J!PpC;PUby_-6N;jmGPOzmPXz2tg-JDW7+Lmt7($P}- zg)QwzM_bipAHHRh==L&N8Fa;9yM;{(h5HSmKEizL5`%4Pzl8$059ywq9l%(F)&D*AslR|^9vzC=?tkUTjQ5E>)?&9DSg&DP=T9n z)|QRO<0dWp;AZugKCgA$O~W=@q8Dq*h>xaWxe&d3Gj2Wt299WrC3?vcu^YcAaxqR9B^+D!`JacmMI;nG$BOmL?w<`G! zPGNpPA`)wNNc5E2@jabU+<)I|Cf3rYXF2uVEJ21ErcW(KovZ~+bcBKzp?R%GMLiEPBL_dd&fRzYNgex0bg z*`Fe^VinoPDzfmiitNGvqR94okv)j}^uHpqG8I`>E3#9gC$TF{c7aq0eDMUk>_QSq z9sGA)YzJz|3QVkzww4X7gA0#3+|h!|av5N82jeGZBxGfFkO;Rt`DHPAr6LPwY7Y+C zd1l*9xxBnEk{jshB$DHAChyln<&)+EKO+d8g2*%~xmE4xo3usG*48DWnp8rl61a2i zVdo~!jM80iQPffEDJR~ekvaErK~847q$x0 zFJrpucisDOmZitqISjGbz__CVhIxgav8Au3<<*?>6PcQ&Ow&vpxd^ASwv4iE*`sZd zel@Qf)GP^t?shaJT>7X*Dauv}*8%5u;E*{aNV4RNWO00dqe|$pY`KXr1th{@HRajD zkQ4fFx5XKeEmGVL&heyUl+bKh*;{VZB&ab|Q&JP%~CTxTr+vs$2F^l z+&>&~qde4OcG7N~HE^=IIzl@dg`G<_0$?0pM1Ld|N)${#dV@v%BkXEmw{yQoXid?L zBvoQq){&NQ<4z6UnqrVFqaV4F9vkikTv+A}I+7lHBRO-W+0~VCcjBeO0>Q05>_`vlt7apoB)1aTQZDu3j0*CC8xPeAfw?RbXphC=gg9dYAs53!a82_8R9Md|}m zA8KZ>YT#tD^DStGnNuRL&$VffldAgp_1cFgNisD(NeZBV^5J#G204{Pcos@ufs`u6 z)8#wU*TU&cv34dQtGORCkbwSVAzQ0tlc;E|t!Ro=^c3cTt?1RdI+Nxy5C4OoJK@a1 zB+6XMv6CY%6O!Y&D5@?3R)vq7DP|U1;#Jy8ls6`>5MxpQ3NZ?Inx)-QSM>pNYM8i1 z%`Se-fJBYVX-tJK50@qkj+NTK|?tEGHID9gGkWncOHV^`6;<^fzAA>~{=&yviDna|55^Ci7P zD*vpj-4ce^W6AEcEC$NwJ+NSk&Zwt4K$F2}alFGFL4;&*Bks)=7G)ffbmUSy$gs^Y!GOHQ7JE zAzu$Yx}SrcRLDV~ z!o`3@0koG6J9WE6^7Gh~Yerk;@Gc8nS>}b^f}_pTlIM=6D>6ldjC?*VR1yAAPqbM4 zv{CWe`v2AtWvr(PmX8&c=(0nKs@`12wy=;29^pqgm>VzMjJT4yZ2f$GnP@$yfh*5t zGq02UiI5(xFUyRGQJ&dXC`OzB8ZiC!U9Qfu)u|tjJ4N(vPa$y%yd?F+wcfyNyMal^ zB}mRAKu5}MV`>)hZp6qvuGoWurD)(15k3d80lV>9T`{rZxrq2l%?t#CZ)kOW4y@CW z0L@5|2lZtpc#Try(7WT;*N5r^mJiltc?~bro#ivyo3;ry=cA^Za-tnj zsNu$M8EQEO-(P5#=MU1aW3ZCSVWc4>k(36RzQ2}knf`|r78O9Fi$pb!m{! zb=sj2cE+T}5zDUrX!}MRJ&g2Cqf@8Kxm&z8$ITLF*41nKnOwmQoW1eQm){ z2Nm3B3;HyuNbxgf4y=j}D)`YB>|)rYj#l|**tNMvzNTX{)m|zFcD@;Utt?pcpor~>ZUtpj`bGmW zuumoOa3HWI)T>Ig6fMNrPS1aAVN8*}WPMNsljRM>By6kenOhx2*T~%`M(S(&)Pj?b z!D{6g5p)b1t>j9is5rTg!UZDHbtcw6X2xB^ns0(ft-%tsj@*60;XnL3dDgJ5*@bn{ zoIorBklotUj-G^Pe7Dr3ZNBetwZ^FW$N&6quDV`W^^gClRS!&7{ndYW)klX_e`TwF zJX!Up|J_x$46FXsRy|g$UXQUvGI9eB$w&6pL%mpEBZCL+#+^k@eSf1uK;EA^BVYJSU;B`QBb zD{r1T4!2=x$O5RVI`|OH?C+X6!Zx$)aPfKmYBOymLfg$Xv#!{Uzqy&hA#7$&Tx|L> zS$!o}%V+aKLzuH$h?L?nPXDYEv-WB^)|zSPJas3C71p{dUFNo625}93K~B0I+)K|j zMNT#bcrLI!7dD&?FFA7Onfb87uwEpr!V_HUzLpg|<9DLOIFuY7E@e$ue#b~GDs)y) zxLRGpA3`sC;kc{qUicB#B^wp$>V>Oyb?j&;j@l^UW-`2w9oIMdXcKWyP|aezNR9f6B7_wp^pj zQhFyUTmlOHE#kPwYI);IjGkr(sTM+%$128^1et4tT01y>t9MbPvy^P)zsUweHgaQr zU+ia$af#cp4IB^On^vtVwdg?YL5Gvk^`Bc{RJ> z^33)t<+NZPYLZ*R?=v>i_&CQO)=Mo6Pjd(3)0Ec$By0|t?JtwDe8{51l+{)WRE-PL?^4o3FN+WrP_T%nh@kKR}H5o22M`!>p@{&&T67N zF4{qydOWjD8#Mxo({fu-dl{hO;5ZVU>CnmMO@1diAPcP?(;JK3%)T-=ILa()!qrI~ zKz>k7eTmh;LC>05SK67iLWE@q=jgcBlPaAfPjtB+^PfeC99}-4hU{3OnQ)~>h&E}1 z?_?FrO0BM^)$)`W8MU#7#LBTARt!7A)qo}EFmZ{CBM&c~-8rZY8r})_GitF~w`B4??h`z|ksl6C`~3Q2cZ(x)8hc1!wnDrpJ|cQ$2l zy2i?QvO|bBhY;6aC6mldfsrW5cPyjn%7~M`_OzO;y~>E+w#0KC@tc--K{;X#xP{H0 zVVk|$HG8@xcqg^lDJYyztWMDoT5=#q|5YPsTV1WimPx6tm2r^4$#QOo@|2HS$}&g! zu%-OE9Aynhd2+N(U5GGOxP^S^LEgjq5gN8{cth$O&T_X^vdzl1_46~L7I4@~K#-4jZ0!JsTYbqx3 zb)we0hwc~gl4gj7AsX`+K1brCp{=FT!l~6zj=$*y8E9s)*7|YNJU;0$wUnHE^DcW7 z1(bi*R3?;LWo(F|4{+#-lRU!dKeG=mhAZqAw4WA=R@70w7b?5CSKP7sI`p3v{Vw*6 z0k*0EM@r{@v8vsd+p31}!oWXK)pm#ew4zH@pIxrKn(*sYz2|7BR$5h#@6`!Ga7)ZA zcIfjJy&F}nvQ;hq^{S>j+IK3gs;;lfZu!qS^beA~nq;dg)vD|fPHdMnGvaboE{B}$ zN6HQ>i!C9RhdL1+y<9e`eMh1qSuz&?k;@rve!xY5U_TM!_E+5R9j>fyVl0lgLWOmv z8pkQ)w}ORrnJV(ZM_HBiUFs@h|v#&Sf}#ZTult|csnuns%G$Cs(T z9bJx2`{Hz6I5k=MVE_lGv3Val4$PCFqeFy0sJ5G{wvL!s8tw(po`xqMn8?8b&Ao3&&tC7W%@ zxUgiCmW-ohlPwt^mVBrs<0<*jmQ0}JAPbp?b)nm z>oEU_Fq$mMKc=;>c8OlZ)e-o=YWXi~h6a_ASG4A3l)Pd~)`o4Gq$O)9nPf{gYnxYT&f_VynSGbqu0GHLKl!(ps;Gyz5R1dd&A>NXFJ*6`8)I6~ zppP!4@f=HAFy7mt!G%%W0?ka^Odd<>$Lz5YH=!)K)W*$TY(cx;x0K^skUse#l*G-R zZ^1bPO2d^lahXKotEH(nZia~@Jl?+4S{?JGm2X-eue3n)%WR7mwb1ctO8BA#g=7(y zNmE~Olq=-R%u5*q5-8;JEK9kvg&s1Rat48Nnx%Y4nmW}{z9(NMTguh)dAgc-+2jN_bn}8qXl2(p(ac2eg4sR{Nx$M{Q$1 zm(Pzwgp_x;urWqlO({|q%Knb>N5SrEDG$i!hoD5t-#JQCjY)#zAtLeBP|9=kbhz&l zE98O4(8@iR=nxN(&);04)8ffv+<8wEHE^2P{b8B#L7ncjSfh^-SBc&E#IsT!X|qL| zp-5^3t++&tpqaNH8o#`DItC=9A!&+GJyGJx^w^Sb&LDa zF>G0S9dyhk(T92Xnay+CF|r-WA$w#Jwg(ssV~CVLdI^4N?66~)3F)yiJ;5)+3;n55wd%~2HgwmgEo~ckj6_vuLOmKBku-7cHxV_pZjh7;x~5cc=Q>+KO_?5L zSPmyh`jE^HG1%0a;VOu^&_)T_L@)SYfUZM(I*r4HABg~MZ(!-K7qgG_py=UEK6 zKIZY&g&F`J+Dbys+_Acl`xjNHLVB^li|{rjrT%ssNucRHr?jTdQCsK8!MFJ4a>oYt+_j+!AB8( z&??+mv-FVe^7rsbLZHbtlD{{^HvOkoqO#&mqwVX$iU&aK_KLG~BK^1{j)v-EF&{ln z$6*;;fm(GiBl19*k zAg4GmM5Sg<51Q!fY8D`2XVHa3cWaZK%+iuXC^HG!GVmqGi6@cwwsfi2(!tcI$N0ZU zrkwfkIEi}LN^;PSj%Q~4M$!vpsVX3)4!E;2Fq{6ugrtJcw1V5T0%>Vx?C;FZMQJv- z1jo_ZKS$d|%%t3WG+@%?>;c*KvJdHj@0&Xc<4X-K|OnffDo4Ur6qgC(MJY zbMZ`NOPX_4^U!ex6dSWcO8I>-BlSLYoHYMBc9yk&)>$+nmz;?NtD8n7NfS8Sahc(<(X(>KS^6(>OkgROO6*WGx(Eqz!^_e<#@TiQ=%&X=}yNNXOO`tkx5%dpmB=PT_FTRK8%cL;59 z3g<1hbhMUkkRn^+*koc9fz}f0txdw)D%d;CZn@md_r zZMQ>ytWC%*OSx?i8pzIMof8}3K_KF}HhD|g?`QpGiA+p>M}u4nekXu=IhuLNeB{=c z{%mCi*A7`Q_n0vyLC%(5X75jSTxg$PE7Ie44qH$3uAL^*A20 zkS(3cIml_2$_iWg%eL}~|CY+BUfe%YI;6v5%%uWyzoAr^aMxwNf{`@7tIu1Dw#qV-FOa*1>1HjHyP?_a`ZzXqCS{oxb|$?PDMZuEq%t>g=*Gkd z-#kxiW_IUT_SJc=Zw2(tkjAJNSs&6Kj%%y=&p0?GuU7K2H=f7)xzDy0XAyT&Szw2= zR3kACDCTbK!h8^!XhegXYLk8OS(ql&gz)C^3rS9eEId!PrUh+NO_K%Z*+r;OS)|p~ zG>Ny7#L!GMd`$w5lB_=$Qn|-4&9dFl)}CTX=%=X~Mo=UCVKk4$*w?mDe#RzY)|^Ks zpPASeJL)fTofH<5ac9i3w%y{d2We9++H#L3KRzmmFM34o)bM;%bk26d`{(jdj+xy~GLGg3jZ1azJ;~}N+N9t{S!M}MckvnyEXG-alw)9gieNsyI+ESb{Quo8>inb+tvz_SbM{M0cwv!E1 z>%Py@|E%=)34QulFmXA#EeYP>XxRowe#52c>KuF%l|6}P#9WTbZw&14eMOrDpSW|?n2UX{J%@Se^*nEljIXK>L8 zGJ}8W$Sf*8L1z&Wc%E8KK2eJ7GO$0+Psv1mo8NoZds&i(vZbTWrWfIZS+2sVM-SvN zJNKr>i>b8hgjO@b#f|ZvC4Tb+S$ei&$&S{9%9lHG@5U@YLGC%yg2&Gv#6IKll)n%z zQxM`Ev|x^9@=m!H9BqHf`&nhOF=Wy|mB|kwlOEEP$(GA@ZHmrQL_pGqIzU-Tx6*`; zx{`LEAS*!9i>1KRU>f>qYK$Wfoz6e2{tIcarJU z*9SpywD#dbP(BKP@q2_~S+a8I6hd@Eri@=jUb}?Xu*!LDcf6iHm{;}6c&!&+&sWat zeaCC^!MrMM*An41t8!j%J6`h+!D}w^l52T%o5LJVPT{pA((+;(@bXf_ zwIZ|Uh1vSbnN4uaHvO~Ah6}Tul{0(7G5g}5Wi~*VeN#EJ{*Kv?2Q#ao-8@xh$Tz&& z!K@k3USbsAoWr_)tUYGDTlKqb>7e$q>UT@&lkNS!wbPak*V3I*`kXCQ&Ly_=1?5~K zrBhQluePPrm3Fn1zM8^$u`PW~OBYM&g0M7vKtgsMo4qjS9JSjPwP)v%pDUPkPJvm{ zp0&VBQ+Vpca;m8vc4=(YtRsba5=7SP4>!CB2W0Vad)csw@QE+q|B(}94^rz|u+5Qw zRTh}%k&-3(rahK1mT#8WT4tN*9`i}3@Xcph@C@5#WP4y9A+ljf9ypPvCDwCb@6HSE zljE)TN!@$@gid3FIWdlv{q>);Jws2Ffk4lJ$`dN~3fy8;vsG-Mzjl27TqWQ3C+Q&> z_9w>KL_f3i&z&e+cG&d3uxaf?_8thB@l#IZtoZJqG{!hhkkyPVvD8bn_7Z+0>XvA0 z(%t0xMMU0QHv<_gi?i}XG0!+No?vd!v|3YFkbVc!Cq#=Z(#8|nn=H~57U{?oqzM+O zRFO)86V1i8ww8iJvWV0+(&Fs0^5_841(s18K|0yi_M}BRaH4Lzg7gQA)FB1wL5sBW zBt>eAIZqYptJABKxO~#dlFK-a=<7BdzZXmNS&K9DWPT|)K_-sVk8T$1 zI!hi^HN#Jq7huz($K-G%$QB-3DX2*#=VX+^bSUMfk}6gqG;{S#z4k1A4}y0jPL9)8 z?!7gstcEvj4fAacrcZ<)qotco`9&~vvhLgS&(x{-<|(Y+e#AY4qtM0}c>-rsd;1iL zHr}MMs>$qR{A7W=?r8{}k=xgli<$D4rX&u@b1UM$m67elz;XwafjtVjTQ%{Lt!R~2 zgq3z?fms7{WaYHPy%3%X!M?!W6_{uVO3JmYFI~1R+n_D0rVD^H3b`w2a6a>6yrlRB zH3+E`$zBvhx-ejJ-}))&WTl^?!Gs<(@%P!%M7bz$$R(|ul3ZvFtz_to)%RVbqArI( zmq5!8f6$7C}`M01kR`P@|Fel5#>?hh7?hXpt$48S8_(=RRG_^w_)C|esqz*Uu5-1Qx8Y#OAk2G7>aCm5bG4~88b0|y|w$U@-fr3lbzh@(jDL zC=`e(&eRU11UXcYpnk(^X?h6hw>%AeXbTR1MGQyp6*d{h1lPbyfau@njpUUX`Xwo-<)nxg?X|imUv#&wHq#=}soA=cL(hBNP$87v+dYzs?jm;N%ubakC$l?lHjoA4mFwUCO9PPB# z&}MMUfZ5+p)-yMI@uW@b9c>vgAFSqgS?eHNd1J1Q?A8owqxgNmWJ^NsZE$SBN^_C7 zNS)%r7&Wc8?&hBeZ*~WMeuiCw=jp0TTNY@uO#)Z^3JGIWML=Sb@_nUz&*Xa0QFPi{%d6&EV+|HSy1u2YgQ|ZR=!Rrjh43T6#271l#Ofh?`>Im%wH-y*%F*(|CImc zx_X=~EC0RAj8WQ zevgng^1Dh25aelrYqT~qIei*h{G;uklIyY>XR^Gz$a&mNo;o5hMEjz*fDc|VB zEYPVTHyTc=<;9s}F`L*wb?u+V_D>`G$5m8*-KH*|{lOmvle-!8iv?(AaQ%jq;nm#W z!Q81zbSV6nBYzr62d)h%3l_KE`cU0g1SjRIEB_4Js?^oxlzwsuQWr#0W4IA?^FY5H zu&c5niksOLx#;LRE{B*EsVK=-rvj7P3&XoVNNtPL5as(&7PxM@YAfL>ba6KMZFi`t z^)$AoGm4W|ZD(YDR8Yf?k;N{t*b{#p8&CdHi#$t_?fC%b3divzORjRxvq*1wjuAp3 zbTTPMkmMJrI0UV&94TcOSzc|!C6Vm34u3^SqypWxlSH9L`4uN>9L(k6Bniy#&^yp?jAhKIUUbElbqY&SQ+&Df(mMVr7X5f=m z<<~yzn$LR3`|br6_yp2OlSraS=ou@aQk9UN&UJLl0;h+Q64J@;x>z2VlqKx?T6X$V zPHc#(1$9);vE9--zvDLn7`g`%6^G7k0p-~z>xBI-OF8@?%48d;Gr7{7L7;m`S>Tkc z+w`4T<&>=3rJN>owqJ_>scLY z0GFQ_PNT`L;_M7l6V+VCPXptjLN%9llklAxQf(b6Lw8SNN~k@~vu+%D@N&dD1xIw0 zmwOv?a`~#8{$R^cZZI}Ommd~`Jx7eH+{8QVH9q1%NmNZgyvv9pU7~d%v zle0HSmGAy?-^v0v7gL(3dv1AIR4QgC1ZNevbyA)#RZiSP_l9`+I#jK@I?5Z%$@7cv ztJo7sgT8-|wjt2LEC*KM>l{m(eL8Xne^YZkIXm@R^5k>Sx3ch*pwd1NRF~P1H0GSD ziHK9UEO5)&eM_~&4%iJ+=-g5*ulsegNS;xXCrDi>>2fI;aW}Cc6md$ zxt+OgqL#N3lWkQgyOT*IQBOLj1B&8c21<*8eGSZ>p!B}88me;8rl>4DfTawP4z=q= zMX{QJWVTzMr=WNs`|&49AN0ne9yl|KQF3D>xr_bGy5rmqhVS8k~|QItp8 z5b6_Wg(YXD&Y$G+M7LhXm>mS{V(XA`w}%m%YAvVh%LBD8{FDgy64r^+ld{32o>D`p z36MAHq`Lifxt3WqwBJ^@KhOnI{#!-etLwQU(5Z{`#1SOJ+@x1n#`02b_z1p4sJaJa zitEm(4L!2lbV^mlWubgiBwAikAq6L(GAoR-@Nkk&-?9f2;voeO!-Rbk&(yOw=^IiX z)RxDk_|dmy!FXG+>7atKw&0_K3P#(4t*HfED!~~y7rC0lm`XA*SUKO^sP@r=yzZAY zZMjr_AW%guNZ9V5TBKnfi6?rk!9|a`spQNT-Aea|%FNA5-GGulF*gF`bx1gStcEC& z(btVls<*mhpRKJkwOt<0j8v<@RVvF%hs~Wjof<2zzml_BHxj2*%9~EfZN*PkcfPwU zaQ0i03&DuC;VexBwy|?IWeS4XCU7=^3V$`vU16Ng;0?i~@b$Q(Yu=5}n-*!lBH4qJ zoQZhfBGd>Blz6}STiL_y=V9H~<)~1{)wX>CAwzX=iBBoozsbV>} zb{&TDi)aG_@2qRB(WxG%Yt>G02P3zi?hosCm~IDkO2FAb&b}z`0ZAVB4ih;!-k|xv<|A_c@4ui;u1r9CF=1jz-i{$>oml> zv`USj8DWm#ICEw?SEusa3 zd*|!-uhJs07JWUZN>%eHp|M}WLe*a(Z6HN>dT$;Bfb*#koXR8mG@d~5smgp3ahZx` zZG-xKgO;4U`9Uq%G?gEt*K6}KNfC}*N#c(@huNQm*-c?2we8TC7& zKl00NwfL!24y3Zj%Z~`w=ap8T5k)*d^@F5Ro-BkMN`n_|K6dKJ1l@llutDwkRQDn{Kw$XPW)9Z!sdXUw(|S zO2j-_NXpQcQm1Rh{ZT@diE~NG;u?T?Fq+4QyDq;RXFEgs&F9{J}1mw zb)&U9q`XP08y79&!(Q>9P4H5l`|p!dsRE#l&4Jt)BQ3uS#^GiIND4UC(oxvyTu19YbE++(UPI=r9l6<)Ke{=%*g~gol3Sp^tm$=N>xLL%;CQAs+grhYt48 zuRL^+hwcWd;~+3M4gz!19q2Llc<5suy4OP=_0WAGSAjXM0&`p+@t9>E`ml%Y_t1wt z^niyx=%HVG=l~D>#zPZ5L;HK^_a55MLx1qlz8?CchZcG0Pab-ohyLuL zeLS?!nxu4oo|>HJ_j%|~9$Ms~KYD0i5B3cFH?1{i!PyQ!iPS#)e77nF_ z4*I!=J|4DPV6NQ)bC{ob%qK!jfjP|I0&^Tc^&FoJISR~i{4FrYahK;fEaWIK$5CJo zbEn4~9%2g2VG7J)?(mo+LQH`W@8jaY!vNM=daixz%Hi3NZ!dFa_o?w|LCa9{Q1oj`7gV9y-=TH+kqd zpt`RJ%+)0@SJ#Igb9{&?Fo!8HhxviWoDgCP%wY=5VQ%!8&xV)+bC|yc<~VNf9G~;` ztoP98!+Hef>JgZuFY%Z!c<4G0o#>(Od+3WEy4FJ{dFXo{IvJ?SSzxXOhl06Qz3Zu` zc%JWg=u{6~SR(dgx0Yy23+edgyWweOaLP6tvVsUj?eY6`1So zpiV6FuMb1j(V zF_(mx0&|!GbC|Ds%%vfwz#OK)9Of$?bD4*}?4ip;dVx85fjRn_9&?3bL zuJX|79=h5?r+Mfa51s0v?|A4G4}I4|Cwu679y-ZG*LvuS9{RqAPV~@q9{PfZmU!s% z9=hH`pYzZS9{Q|@ZuHOz9{PcYj`z?HJ#?IhZt~Ev9=h2>$9U*R9y;1Xw|M9%58Vn> z=c2&exC_jUd$Gs-IK&i~!xWgqe8yvb;-OD_XsL&e^w4b{`jm%m2db?Sn5#=*uC5Ut zbB92!Y98jHyFBib9{Q<=KH;IC0oA$$=IRodtLt%(`FV&bFo!8HhdI<^ei32{%wY=5 zVGi+_Uxt_hbC?2in1em$R~|aZLwASt0(0~NbMyl}=AIB!U=H(8Fh~8Er`{V<3(Qdq z%waz2G52}sBOY4jp$~iL{*bG{9M^vp=34ZSujN2ki@;nh0&|!Tdd#mqbbyC`{5JoJ7K{nkSt@X&8ObbyC`?V%5P=m8IX$V2yg=))db z=An;x=spj9)I;}r=wm=t3kb~3kwd}UyxHTa2Zq!FbJPNJn7cjZAP@b@LkEZS0(0~N zbM#+&%po57g@+Ce=>_KK1?K2K_n41+=w}}KL`W|%M=vl(|Eb4((nEK7=&+DpV2=J! zFvoMJrylNk?(onN9=hE_pYqUc9y&6tUtq5OL&04ArJnlfkXm4l`cN=O{fVc3CZraa zqZXLM{Mcg_d+1gV9Tn0G%+U+X(QomXqeD!AIZS~$%#S?g7!Td-p<_dOfjRpB3p3{b z%HTK3fLNa)sKK$(#rnvlAcnC{73d}=S1TLS8y8MEfw>7MFgIa8^iy-ZhkoFp6GD1{ zIr{$&m}}2QU-PqJ%>r{Z9}4EEH+bsjLTZ6IYJoXJwZvn-5Mm0RIQhCx+Al zbJT}|IqLU4^^4LX8{b~*p_4rHJrA86)+8`jlfWGPyB>3jhrZ*XQ$u=zIeLLP`ZXSN zT8JqyhxuDzj^k?2ak{T(m50s<>k*i%=YJc_^<|}R^-E!^1?E~UFo(IqW6t!@0&~>A1?D);_8b@aw!h|~i^F;Z=IRlcqo3t5mxP!CbC?2in6G-w zr5^fjH=nEcN;-Sxb=z4(; z)7r*(=tn@4FxT2c!CVhUd+IGAwZI(ppqy{mbe{Lk=gnc;F+ljxZZVS#EB#P`bEP;In8dUn045<75NuHr zA^9PR&xZ;?zW(Ze$$BebT5c79d_5HWV9ftXNOrUf|Jm;A2kB<=Lnx?9TbSBGp$cV~ z{49_hKTH;0t~4Y`^0%d(%fp3_{+FapeoZAz{hTBLnEE-1bMS{$=JqrHrnV#r)DNhz z>}Y36huwH@EOyp z6fWAUEI;J>k$fG`4VDUkb+oUZS;C!~AhyIddRDI@fHlqXh};6o3yzNcC)pRg#;MjL zdG-86M|M!(^pECs49@NzbVS6}V)3S0^SEB|T3WgRmS=aOP=3B+yjH)jC%;22UF^^E zy^)~dldh9(>EsgI$>QFroqS7-x4GDOT%np7iMB7_$)~V5wUZ(eVQM@1eJ|T-z6Ymm z52|$t4!@3jVDdu~NO`QSU=ls#^2|?M0}>X`X_sr55A493KKxJz!*^e_n4Zb6l-yPc z+7gc@f;tph9%y2o!i@(-!4p zNU$0|t(3D|OSksQwbLMjTRmn^B-&B0r48(rA4FqjdDQ-X#m1g;CnDYKz}n^)8JH*a zctx1oXISF4{^3kT8RgG(lnYWuS;WL|P1OopIw0B7=&_OiQd6@<*54pomhJ$a*;LsN zj?!^{49GU;abF3>^daQYt#Wz555`k_%i5~OX&}PAwxIeMykvvmxt;f2L>}m@Z;o8T zH@V1u(-q9g{GbL?6YZe?f_6=zIg+1MYpcJRUc@|(nNA`_25V+inwevsVQ4Zqz3f0} zHRY~VJ~>IGA@T9ZSSKa?MkRlkc__MsOMCgj8Pt={kEM4&26C$5tziE0823r}f*&(q z^vs86U>=V$*%$DZC>8O`ENRl_I$UJq$Jn=VQ*yKNHC&D2s^ehZ4mxL0kS#wC(~;YW zV|#H2CR_UY62)1uBC|4llypfndSWEnAk>woH*&giRW!mmZLkUg#aKIboeTu`ouoC@ z%rqWBjnmLr?7EnFq30kuW^If{&$gF2zw1etxQp2Vit{7CWgcJ0%gJ@ZiA+tk%)?QB zUWN4l%NpiGrf3?UbE(*D=F*iY=L0q?ef}Tz-a9_3D*ykV znUi~GGAV=i)k^ZWcBpAUb8x#ymH>ifLUyT2DOY6avh)=r!CvxvP- z8yajct*?(1%6H%pBrc{s1(m`BKZhc9IPgv&-mkVVQtPc}h3Oor47C?9q;PRQR_Fu6 z^^Sl+v0uu}C(|qB5_*e6>|8Hck=aNQ!<+9^04H3ltZ_T8rHI@CUjXssfmBj-^qvD+ z>F|T;MSA+wkan?pqLag&rV!Nn(xhz$x?Unll2c6T&5_N*ZD`V+GoQFIG)uQD$f^1o1Wd%Mh^+xj;h&#UGN>zN5R`Fc@Bg5BymdUM( z1E$(}TH}~k%?OtU7YR4=@$WE2w4#Hy6`^44*c9~mKN6$)?fZWuh9x=vkHq*t663#v z#OOT6=`kjb!02xYjD5gV|C6YT^#lHwP#0F6AnF1d@V|+;*!-)A3(^5>OI)n_uca*l zp+Ep3df*wj@?bFdugnbn7u1t2=oYGR1A$FB2+o7uLT#L`;RD6LZ#w7M$$uKQ@vV}_ z?|Jdl>F-| zcPsNKJYsqK^!fV?+WUej-T(6UIVYXmKb68O2d^Ju=I*^_#H_k=_x|DBOZtQyxXYf#)s2x8Q;Hb1ciycLmr(_>_hWk6>KEBoDAVnb9`t|`R51BKnNG5&w<{l zof8@?Y1{{j9M5xVsE8mZ(f8~Ji{^y(_r7>2NG3tDI@sdB&khauiWdYS&tk=IHWoka z6&Iqa$b?Xdx!A+1qjXZ)Q;lWI8p{?omMsrfk_XpTe4?@NwZ@8nYAkylMm?w`di_B! z0rMfO1fyAP2YHN!uF`&XEH z`}3a6q57*C@BQ)mdHYwMJf++H<4<~GYC6MBEWE=|c*BYjN7i4n=!NPHU)(j~#CvW& zqlm&avyS<$U)p;GWX`^ zzh8IG?mu33YX=Ieu6p3HrJMKou=ury&VKUo(l>rTh{EL`-(B=W_gxOT?6&jfEIj<0 z-EUFhuw{K&xm=3tZiWW!)1hAA zn89eA&8K8h0QjFoAm&*9O*P^6$a7KTHJ5H00u?}3nDioL^wrj66p_3_(Ul&tt-c*V zC{$#AD~)9H$}S$doJA)qnaR)cCMnX%E7`kms5bJK=r^ zDU!^%oEej)XJ^5igcX2vh+_Uptv>|Gfx`-4Lo-A^)*9E%6WV5KGi(7ZT3-)%Sokg8y@fZZk zJSR##Jd>`lZWwV8AAhMAB6q9T{~w@OlB{Aj{r_a+j_nlcr2n~!J7%+)Mn-yF{2ok_ zq>FsI`0iJAajEU1N>d!=b#Zd?F1pag=3mvt)p1=oHrD4>MKSawT_Nz)`hESwP#ARD zDpKtR*Btj>b~A<$e6K}<*rCKJwzr~}Tk`!tUHply;I^{$ENPjdVV=LAKTPuY*NU?plSEHa|2*yD%5R=PjI6Y`JXr7?R2>65Idb6|u|sG?~mvD_3zG|Vt?v@$l#t0;i{i<(_g zQvN5&ze>#FQrT)JznFH(-?dpQr##-G_hN5TvKi&g!6HW3(LBT$#_BHDvIWwG2=1v6e(kHPLVNI=`rBF*Z1f}mJw>d1QovUH4H3#}zvIadl zGd-RBa!Y84*^+7#I&~+h-7>!1mfEHFHf6S{w{6=kQO*Kjv(8qrjj$jCF&HdoSsoyW zsAE13K$PE$UNA=rv71;ME#KMA$z`uMy?BJ&X10x&IF+^yKFl^fl^F-M9zy?2R6`4# zoDp2<&^#yt%FG59*Jzdh9xM&H07W;14&qxgc9iE!J8^fIsq}d{_kJ?$%bPb&bLPz% zyYig00dO9Pg-Z)5eDufT-o5tIp3C08`sYWoJKa1WDJQq%d{0uQ?SQUyw6(tTZx4QM z`NMBK_O~a`dHKnUlX6{28NsAXo8adlAAWMhHbtVuhb1QW1SgGTLrXZn9TX%s%$}Az z-AY*2e6u^o073LUW3apco{^uRMGDqa7BLMdf+bM74Nwg_1!g19DSX~jaM*+;_Q+4UBxBE@bh~v zy{Xv~1u@B2(M(daSwR6crB~}h%@S%Bp|m6-s8mlQn-}zT30F+bOh(a6oA)JZ+iuWXWCEm88yvz8DXb0fSwBZE~kpiRZw5*2&C<1De0Zk~ufF{}7CIgxl-vU3}KQf>R(Yy?3B=eF1jbjaHL^U^A&~SZt z7Bmi)AW1A}?E6Lw8i!Tdjs=bGBB{63Ar>@hQWi9Nv=%hY3sy0}CJP!nYFW^T*H2`YJfIY1K|>2ISkQ!6&>gj)iLlI#7BnGkmMmy=BgldV zkz1k#ji=tyXR@CO0pYTr*{*tv%AWCzc!Ngc8T#;yXF^TJGsYUv=*DQ-cm~p^x$z9u zA*Y&*XT)@59pU52B*rs>85z%nwre~ibeCW}vlAff*?TnYS!>2K>Mg-|M$6K=@r)P= zHZ8e&%fd=4wlnr~YdfdbTs1)I{4E0n_?f3F<9x zE#uT%Ekb|dQd;oeG1Hlq5KSTx$4Q93BkHYCr_NM8_?FDslfL?U_17a)S3lIf=cwC; z@_Aw*Kz8=gK9F^hV51OmT#c@)0(hTEPH>B321J(7{ZhSjx*`Tqz%2I^ClKo2&iml# z8XbZGeBf5t7k%0}S@^f~rY7`44@hCB+6jK_m_d$nWvC0YSl`xH*R(|TnwZM&L5Nsi zZI^X;JLdK2thxK8JaSjvMfv}HeA>*k3lA)2UWtY0u>!%v{`ykoxi7rIu1`KU zv`tbvGb!!*b!i|FI)3^S&wsMy#_x~(s%qh(W&Ot-Ka4hCIq9qg$8Gp>%Z&>s9{XIs z&hKRop>WnMv-jS3+PNj8&z*Ar`@bzZJE>x1$Eoq83Wua5`i|4sN$GZW|Bmtg?nB;m ziheD$JfQ}p!iFjvL(u7JZkq#1w*@>cQ@Qb_v6l*B@4YNM44@#(oI9EdSy(ud#vr_x z)9E@4r{~njDGQ4`ehSgXwYx~Bv5Qc8_@#?%gOoZsgV5$6{)UrsCI zxre#l#rhDE&ip(L0(v4W!7?Xfi_>`w>00LC4Th-}Y+bI+U;yDcSC)4Tx_jS7e_}@P z;UtYnLSN}4){wOw=~qtF9A0Bh<=SfAVB=jF-#s+Qm+$grr@@Xx0_@s3#E&B{K>#bp z_J_a?<0)3;3`VWuVD(z$bgz^qhdbg!QawrQ*1{t}-Z9@O8N4-?DDMW0XJ0ATQrsj+~-1fI)Lx(cPrd|GXd` zf2p=Un=+}&Z@$P|?4fEW&wLh;VJ9hl{ca|M?z<`-i9@~&-ej3V|H{QkfFv-hUjzDb3?bhQgH`6SM12k8vewmMf`DwClI; z;Y#>8+q@dAltq7)jNLE6ycPRvxY5DbjfxxDY&(KbS5x9f)bBkMH$8_a5a|! zoRe&|4EGxTm%M2#J6^@_#mJbVD5rE>!>H{zA$K@A6D;=0brB5kHBfLi_6^ER0hA+2 z)+c9{x#kGuY*9WNq@n!Zp#Pg!Iv4F$(CMhmLFMKbj{gP026mC%`dy8aVUDv-Dc(NG zEu=5l3t^8kmWHcRAiV2<7)fr&@-oLtVlrZ&CcU{2YHbBf&EQPZ7vQX(;vEFCc?m!! zVRX$LK@_=#xSJ-(44%v5iaC)w0E+G`-I?a`*PhW7Hpf_G>m=Ph8>GmgX~tgj(n(#1 z037-~Rj76|2f^AtSD35jR8ieB2OxakvOb}bri(GLz3X(gIC34iuCVIinB-zD-Ra$ciJ_)xt>ffYcI_MGR<;w#t^7{n%lxg3X)sVLJM-MR2Fnc-4VGJl|8vtYP{TvK0LWqYLnawfUu34?Z~bCtEtK9 z3nILk+^QL#Od_`u%t&sPx?Q=I&|QMuY9~P06Sg$%S!;4D^_C#F(z5J`+{%Jfa=Dd- zl~&|d_Val9Appm zX~BQTd^J0s(1^Ft4=*h}Z_`dPQHUTypOq&sImS$$&e6O;uL3zM4VzKo@J zaM!msRNppw$~X5s|MA^-o?3rpQk`c~Y0#rcn1W}YQ2p}@ceS~HaQc!7SFHIwlGG+O zsrl(?va;>}*Mgb#;~t%Ft}3}^>#)-9Nv&5OL97*e=#lT|-F?MV)fZI4Kwbbc5rBy5~51 ztZ{NfJxFnSfwS9MCwl~R+d(jh2t9T~>4~LecBo&DQelRvIM;C^gPh#0j`OfnvQ@Q9 zBIh~Tq?e0yQ-!XP#ZE5lz4JH6taS?3IOgHCB#Z2?gpqJ|0Mi>}Q={`SPVfc@O_}N; zT7d*BoCK#|4ks8SpYF3>;#2eRoXylNWG=4;5W*l*QaZ>SrxQ}j$;UZ;kK^lF_n2dk zUF?LPMgIsJkB=WdcQpEVu>9CNCLF`R#Y>zlNKv;A2}7~ddxKh@UdeTWEYG@^}m>M5joJlWhlQWI1AnMOM+(fKU%5r8Fl&^qK;q z22F5Hb&@$5)2Wu1NL9PXl23a%%DvtBcy{CV+$<;N ziSg`)LL2?JOP8h4uO&|4QLmNv)Ji0WD&PnawRXX$dN0?+y?oHVlq11K-pjY*UOt-m ziM`=FZf@XrR+>wwzCafTaG)$*^~d(ajXeL4M_4k_M`>YYvr+&g@ZyTq)Ee+cB~Lh~#2z zAmdw(NhUcp5S`*aJsdU>5sfz1CYWubM2Zp9OBjJJ%tDSgqNEi8ZS!zx2w`_+ICo@X~}0P z2X0tW`{jdVQju+5z-5R2A_(Qn;Jlr7XqF7}!LT{o-U#DmK-JWfISTkN11il?98U0f zNu}4PmC#~GADj@%WJ9LokD+7Ny(MBJ(FZJjFHEKak7nJw%f(v{&20GYvULX^IN;Ol z!E8ukVNxwiQt~0`*6p}DhXHNtCKJx4aHX5SzGp_khNFM^Q`l5gI@uF!K?m(rz?{MsSg)m)t4ywPhD7JWYH-}p6qGpw zwLznueb*4NqRc7fi39JhR{R-fsC|mMYn)w2Tctbi_I1}<`E72Q457MKr~$r;y2P9~ z$eg{3)5&#SjBz~3!^+C^vKUyAtA7aq%kgwgpCYUk+mMHqZZK=U6Wk3L@5^lo(%7pC z_K>>4zCjLzVW5RO+E9Y3ZB!W7?EGg?>4Qq|`h9x5n20pZAxm^*Gt7G@Y+njv$wp$UWHip>~(dIWjp+LtGX*JT7L9 z7B{H{61>|AWSmx#!-rWYwB*h}NSsBU^nz?YBuuEcCrSz z&gM8apIdJTh+Nm?f~%wEW+Fq`0qX-FXT%SfU^bRqh60A(>wwh!IA(%~+A7gT3!|9P zvSn+Uv(QeGCXhS-6=kb*0OkCW5jNz>VS+ddu2O*!bSWvgRy7CLdM$?p+O+m4f_C3# z4XULv>Jx(ihe;9E3}{tX;zVhpctwf7{h-Bmoose7EeKY3e*+|cupB6~h7?Nv3Y*{W>`aYr8T)8Mo zWg!bZtsX>Ki;LH=y~WchMmqem0-iK3UT9QP(2i!y*WfRo+y}ILkO1C#O6ndv^o!!o zyj22QOzlk~YlHoAV(3Mt;4Nr!t$KHK&P5C=ympr`qWNG_eg`(aGAxmJRc zsU9XeY=dqCQItSFS^-RAqv*DG6!xzK_Tf+RxpnY+;6lpN_m}SQ_t)}$rwP!k<<1a= z$B8>bYdv?GuhrmBih{Odr*U&2f-}!m=ukaC5cX~L80`$!=l)K1fLg@*u>%Crw-vLE zJ`9G z3k1wrY}w={(!okRMce3(=OOmDmpeX2jz9!w6Zyyd0aJ|s6%>SN8!5F+?_6RZi26Qp zN2%{Nn~;YRTKZzoOHu9t{jb#}7SQN1g z6al)eCpcaa`6eFEkE%)5BH>7F}I=2_FQ z%dsnMzi&wR9!CMsw2HemU|TY;7;Ry|yf_m}F+Pe!BTuvCE%>TUjd z1ldh+f)kNSvK{TB<}A66`7~L%SGiQT<6dSeCQOJR4H3=5_)2cuZ|_Q+#fj_2j3D~} zNW5L!areq{4{6S3V)G@q-?4_SxouWyHG};iBwMS1xqlWkO#5gC@+;ZTU58Ym2kJ`9 z*#afntZ7()HNNSM($0I+9@v&`SemR^tGJp6(}+2@#-1g5X#>S|AyRLK_HeLro+fMV zoa32ytw#{mfe0pla|({}446Deo(^2o=?e6Qr5QiN?pV6E{A{?wypFLHBDT*Ep+ibs zhj|=UeT!vA=9uYt%3zJyz|ZVb#GbpDeJbE}5v+7}4Nf*uxuFprJ{#yVM3K*(gwJH9 zSY)10#md&SwG?fI+I$;J!E_f3$1WUsvjEXpFUlkeUodACO9p8E5-6QWH^pXDnbY~- z>A>y)UC`Jzot%&TAfhF}$lMLBE%<*A|3fWgS)71b7{Kv&o~@>(nWEh}uMvXsMk=Cc)0n_f8~_hzsR17xQdiO$`2QwPGJ zbdbbhCw8`!owe;J?yP(Y+1TJ@$2pl^k3y_zq8vr+<_N#o{1n<%Y2$feVu7>poXhz}&Kz)8I3>~Ujzv_+p%CT-VS8{ng-x-iF1?lt=IjC| zvW$kjDpAwM14;#oXjJlFXygxH&vyy@e=Spm5s zI_xwF5|b$(l9|<;{SbNv?mlhlT}!_>>Vo5(i@$h0rNe;}WZ0fqI2x61=(4kNUeW;T+oy&KWtZaJ)o~VJtfA{9BGaelOz;B-DdEx6X>{B^j64b=P z?0gCnV|bme5MYV%e-0$b(l+sbK2h|c#JFIKaLwH&1lZGZMM#Vk)SwMYjFL32Cxt)X zF>t^aB}Lx_PF;EC#-mav{7!L-5@R&YQv|WZ7)=W__{1Pf1GRLAtgrk>V8-Lwi!ytB zHYrfMe70OB65|xzC((C!Rzt>F>w2yK=BaZ=7Tx%}udf#rB>GXX;aRu%P>yk9NVjZM z1X3B7tsYHO5|J`9S;rM*?CyqwNj^Tvxd^tym4Lr3w!rer4?1J14s1vwfovvS4w(6- zie4$HY$|dX&c2XxVa)67t z6x$Ml224m!mkbcSKy)lgS6s7aDTiNow&^D8Zw&qKv)+nHhn#L?=xGMbGqg<;AOK8XWJb53TZ0(-oK7>$;ZSIt z%_s)g@gp#qM2NojSC4`HjUt=BmDnOF(%&^L$)><&Z#ai&I72F&!ZR4fQt5-552G7- zlz1$Zl{ttj+e!YMF@O;W5F z`7PSc>bx3gwslUT7*GC{Tg-7)9xzv7d&@0cARFZN<}KtzyrEP&)TCn#nvM48IXLv# zA(;ZJ-BF|)SSobf_UICK!_FZx(EeXd^S#Vnc(XfrzH`o8^A6Ww(=>;O29F}1it^)Y zZoFeStNWV!2u>~H-8Bo*!&=i}S@Qd;S6X=;#-u&F-LA|T00ArS7*6=(tbP0m=W5dl zvc5Cs&oBg0caeC5-^8;9exu#d=h{Zu&3>HiTyB_D@B`XibD`62qx{)8kMBT-v%N&B z$c9Sl(O$!inj1nwV8;A&R;9B`F=gpy5mOKs9h5)DD@5K5a(V}Hy{^M(BS%kx8@aeJ zhJk7fxTo1u4)>-gi$P+wp{TrO+xIp8`z_vk!}o#jwyk!)Uk&pUD`Bzup9=2uvF-o_ zz=VR7?yI9amj5x1hk*&-hWZ^GiPB{FDoUlDVX>-UL_J1IydkS4Yt{jF3-AR>6*-D2 zL?%v7vZAQKn4}nwpEA!+=wA4TLWsfkR)bbavnE6B780Jlj?cPcz*tKNgDL_7R3rU{ zp%7)nbN5oGe`QJ=LFu$g+J*YNguO83O6`?Zl}$3v%xE@V#DTvpOCha*bRxV{eIw4|Bbs%;!+^Qx$IV;=F|Fu4ZOs|ZuMyycC4WR5V9$4AURgv>u={*Afwc%naDN0i|JGcw=eM(mQGQC@*k)Z1vGb~sfhwH z4YN@v-V0|4$TaYyoc}>zZe$BAVYxF1>8&^l3JPsOK6WrnDekYT^r1?)R8dF!2Wi08 zFQ$I2laCWy{NH+-#P%&eRGh>68lJAE1C0q5sFr;j3i>VML4pS2nBYf(03e)`D)=G zrra*>O|z@47jH^b`|^OlFAuOU5H&Tyh-`Dh<}woCcvqMYgjr6vp|Z-;Wrno>Qs5-5 zm{?8S%s2A5>&0LI0i+fkB}Z>({!qoFMV5F+i3|o#bg{tBU?rv-NCP*lpm)>$b9Mta zZJio)4e?OalR2(dZN3mFiyQER{(%2RN{Sv)Gd_a3%6U%eMxC>_X6Xbm>s`Eac&DkM z69>4eWLW3m&t6ob-ONN)9%x<(BCRpcmKG0&u!U zmy5k{@=;m2RQ#yIL*rMlota*vQG)u97IFb`YbBLB1f)>{AzzE_|hcYR{%aWmKcC9joW|# zd}6y|0r(&^ToVhxCo-buPd@g)oa<_6w3;;$as}?V-=L@f%DcFjM5O%U7~;gIz?Uq9AqIGWuTv4A4woge^UbzH&C#72|z}6gQ$5I92~?df~WVNhU06 zMkb^}xK}yh4NkjAPRPat)bg)?(qELHS(L#Nhj#;%`-!6f(Eh>7i{%0(ApV~vuU)0d zi6I5{BKw5bInvkM8TXHMw8GiwvvQRY_ zOap0xtNSzP!+f$UJ1N;!;FB3`hah|vz6C{#y?2nnBaI*+USyGQ$wi`^VwKwzJMGAW zMg4#oEs_TWfDKk0`So!iXrtF#4t0HObnhdTMyN}04(OKouv(B*8~9F-w<1!CB3rSM z&(px!^5E8aCh|G>Mv>3;MC5Zl5&1kCANd@r%i1r+s4gO(3o&}ZEyd10h{alZ2^zv; z6s^Qgh-iWLL&HX9D0t?`2*DcKMK2C1?-YlWO86tMl8p~0>aZk0NI0@af-)JCqED2f zV=A~(W9o(=(FWD@`weAjdz7NNWU;?JYwQ9j-&v`-kNNo%mamp3?1h=z?u5NS@7{E4 z`OG6SZJ_t~>DIt^X}J!UuWU<;M-4CdrQZz0vB58yw*mMnAve(sG4FSf5viGhAQl^N zHK8jfJeWnrNX~8*cG@z``)+ofrvT9kG~NS>aLTN`2&eyvZPxwMw9Q_GQ;jti;k5Z% z%XBKjso3vWgj36YE5fN*{>BKW@b@;tsoOllDQkmDjk`wdt0J6=Yi*2hs%WlROZ?rw z&>lwl+4$YXar<;f3=zl=ga{A$hxBHv6>E}$hoN6SgFg?C0V8(Z0`-EDSJLP);71WN zaOmiKVa{PF>=p5#y&_5-V$DY-VCGzQjENt^#0GgD_}fDy6<7DQpcAf+)w4L@9-GaE zpCERfuW*EqEc0@-X-Xwa{V6@kl-fntSe$Y>GS@hvJ>K$?PC$z+feRI9f*m5i%VPgC z)rk^sq(MJr5DY@|@^zl@`tvE0&E zz1@STuXqFV{FVvUxC&zMS+S8)&%KC5RF4pS6?*4Sl`EG(yosarR_w2m1z1KB$pwmI z?rAf9kuuPlj{}j~CZ4eos1rij0@kh#WxJE;tBgao&3T5bQz%}EvXcEb4gZT&PKkv{HET(Aa7i^*W;ksorW#4A?BfKif7U+%nAv7WC%Z5R zP}->)sZQHP z=on-|Q%Z20A_=?K>A-U)I>_sQ15YAK;2rY@MVs=j=!p;)rA{~Qq8eg9SwEZif!RQ0 zFB$`I_0m0yuj|o5KdV3`1!j{fxR0PPpG1W^L`J%7FpQ{9fnX&Thv`@6WZ%hgtU!>| zU;mr8XGrdD9!aTmy55ascm_S|Rs^2yYZO}wA@$u1 z@Y|(~oGE={4Ls7#<0evJ<>_zQ7~kE@)tJBb=J@7HlY^n;=9zLltqC=tc#t2$W^fZz zD%6y)1h5)02@*4;KP)moqH_S$E|kzd{TT2e>=#^xY;6#zEVCFQV7{dmW1UOT5u(FT zXfmY&5cVl{kdvXACc~(~h%>{6n)wkb+(oiUKe#bE<}?=0bihsn9d>(N`*Ki&`kCyc zPI7WQ+2{8Jo+1Q1JdtpzYP(?vEB#xC5Woh;u7FZMhLXZAaXUD zF?Li+$*Bc=2f4c_thP{OL6~NfqY+?*vSa-#yz#H^J_z;4?O;yEp(SdvP$b58ORCDm zX*c-Owx&e?ky*1EhY8BCx6-m@nxBZA)^@RVEsbE@2ps1uw-tW0i=PP`;H#sR%3egk zv535y)66waBzw}BG1X3c#U4grs~Z4O;H%T@?YCVXJB~HBk!_a43oC5s%~s_k9S|)2 zN74b&YvdZmxDlV3?B8;55S%UVJjpIMvCy%I-U}Oht<+!Ey&m zI}-M93vgL#E~HBY4nOgI-nmXHcBR!^`Fd=F7`y#)CwsM%`iRqRv1-eU*9UU*uUSP1 z8PI;Tacj)?=m3O?z9CJtxLu7OS$iPz31|RCn91DC4>K8uX7e}YPZwf?DqRq)b5g7A zi~?pOPS6%7m1qSr&1VYJ_FLuSkiC-(mx5#*FehV%lD*UIm^KV&iG1Xz%L`S=zU0?5 zlw)!#obHREk%&kIvH~34F{mF1~f9R8~=h z!XuRldtjosJDrblIAI=fR^oJ?^Xr}n0vM2QC+1Mt&BpG z{{%aOBawLn>6#3p18#5bBXfkpE0)qh0cB_?3pfVN{+jDp@&fZ2Tfz(0ejxDv@l4A< ze-;Xj;P4kKFoGAi5PLjAKYAKnMfh!`E8n1VCyM*iV>8g#yva=+G2hQn0&yfm++K|~ zv7TC8VJ|&xx~OnzQo7;JN8ESsWUQGX^p#~EOhHk?c|_=%#xcz!iU1i_%#;cQ7KRY^ zky;66?!fGxGZ^@e(ImAROZ1(Mo{Lpc;Do0OcX@N;?A?1!xw%hi=%Ajj$*L%^FsYf< zrM;=#r+yRGr23hug8EPfe$ZYiDL&RNaSfCi` zq2A}l%QBzCtjT(dMRCpPLe1$WpFvIX1{H*KwlYzmedX)^3V#Hr)rFCyAdG*~Uiw5v z3&5$}KsS)l|0ENZfkE0z4|o%qJGg7s2P^r%mj7$H-tobzafYzop3TmD?z013NMCYt zq+oFWEbWw{|I__1an0pq+GjU^uPqO^~2hhw~plz0) zTbt#t<)xNib{JMJxD;$uYO)Z5nsaIqPNAJl(FOWC0XzGd_h*$e%(B=d>E#Ws>3*&u z$3rRNW0~I}2=Kc62W&y96@;|GWzoLGE*sCTu`DOEsCBHCkqmWWKu^S=j7wz%Z3Im> z*-sF=g^(Op@P9&;WEsikk|oVf0(eNh83PZ&R}*;9<};zX(4ZVEegHtuUg-nKHx@uZ zX_?nBI_pi|wu+{=*vrwlr%S2~Q=e|$3yIdi#1Az;VWmi}Vm`&SIEY>Q&FEtUo%TujxUh|mKqXgXobD$X_cu3wPfGLuRhCaz70Vn_czaBc%mT0#3#sS z{Zi3VV^!QaaxH{SIdYmYXr{_y|MR4Po67c~|4~kz!{y7bw&3>86XJ$xuAUv*O^e>a zI@^AVBW`Rk7*YtgT!RK6)Co*2UiB@1!EQua(S2n5fE4o!aI)Xol{IM)3$XbsZY&9Z?HA&o5N%|&1?UBX^C z3jlYVJ}avt6|3M?rgiaW55l<#^~KJfs9)_YBLx5ADJQShn5E`E^H5wG}bqD$gp)K z_-EdqFB5tuU^gzErL2UEv_QLyH3t;cm!T^2<19>g=E`CyNFrk3 zG!S{i$%Pms@{!lZN}#->IS7Rs*amk2s4!~`S&I9X82)h*NCYW_z`xKJ#gtj!4MccP zAl((_z8YCcbJX1jOB;KsjpFn09vb5c!85^+jbG==zbg!{cig${iU+tgbXE?>_-1J#s1{{$)N-#u}sfk#yz&AdJb}`5o zDN3%jKK2cnhFIusQ8SktQGy2xtUqzT<5xx28Qo}I%;U4<_<_v4o2jg$n#@H_j#sD3 zh*`}v-_GQX=iHWFiXBh6`GlG7f)Jw;M?x9sm=8dD+U;M!2k29j*otuua3f3UEzK;0 zU3RLMQ67l^f3oDa*nv*xZ@$#x_0hAkPW2ooGRpfnp4_xu~ zzP09e_TtmGT=KYp(P=lrdFet5W8nux{hfVq?^@ibFM|)S4h3gtsf+@xsK)E6;;5ZoX*1K zl$>1WN$_=EvflO2*e1?cR~S2q`L;{ET1*AHjbUM-%Z}z}#Cu)X^!5sBB{PgPR2{i) zCE*$Bi9E`NVJquS!d!HfUSsX2NLc7T>#11@aW;t8KwT-NU$e3|04bfh`1-q{qKw!p zNi}IxZ8`3x=1W}EBU?Njjpp+Fd;VM&V!_rc3it0Q=(20B$79Z00mLSzj;1jhS16A( z(-k510T{%~WHTw$K{^QPfXgo|-gL~9q1tA%r|CR%w94McO1fJ!!dhqnjB-V&jN{dw z+d-0t3YtRXECm#Dfq^ap9gasIZwZ9i9|DJ3^$|QYhHgtkoXMW12q$~_sJ1bqQ3{QY zEeb9dsbm-nL)g49i`}EU=i$10lAuIBl;;zPpKBAAc8eJk)7@ybHm&zptBTb^%J|Pf z}t~|Gh;G)(%{B-WcdUVMCp&d7*w+Ca6tTL0vG=c%rN!)j9F}?9hyLh4xAvI0 z>pe23%0i)T@~4S8!-94O900|5eAY4^YzC1Uav~PwMtLB_jj|$kqrlCW-B)Q*#Cnm^ z0+bF870`42-L5&|D{N8rJMhs?ug`&2EX?Joc!j+LdV%cW2F(UeLN9YGixR7056yF! zWqT->{WkkA63UghACO46PKENY^ELZ(DVs_Tzp-~JRId3NMWsd;zZnbYCJo0|HbcS# zw^m+C+(`8@O3No6&mt$Qnxj+}JH+xnkC^RV9hwVY+sPohMZm>mZ4gf+vU9MsPO*0z zq=u$z+k4Dn@>26JuC@%b8mcDWk~>|QVSwz@3dUiZunxKxLoHE&e;=N!=GstXkq68I zhpdf0IAFg4N{X7_W5)(c;%#J8XYfP!9BoH{g74 zdOtVczFKV`z?fwFyxX0f6WQeLTq8A9YpEgF?9E)5Fh5q-$9Bd88^`=N3;^8MgEhRs z0-v|XInc=)g*nWD*6?h&M+AHq5}EHoO^i_6c4*BmO0a4C&ui!G!kIep_M&hUO{=!tP%}$(1=pmCh)|Y=Zaw>+E{iXo!U@i7u%Qq+X8#F%5D- z$8N6Z9=;`mjQIm0Um5TQ@UJ#iRH#}3=swYkf6rZ&0$)wYYHy1%!J}Y^xu4V5%k0P3 zn2S>JbqoFw5SPNDm_w)&HunV^fZMi!ZPyqM-t>vTT*HKd$6z?58`T0ABK78oMmOPT zxo0&)G?vbTuM?R}(${c_(-)hLfn4#R(Nt}6nmGoXbRL`i65=b1Z7nJ;D0lk52Cb}` z5u{*GfvA~hsaB|f94&ai3uFJn{$iW-a?_y4{nH<`e2!BQ5#sVxgz^BPmW<}Ht8g;v zpfZZtq%?D@L6==k%T;c*kh|GBQ>eEbF8@RJikY?7R^s8j&8}~pyo4w|2iPwiQ?}QxdCC$ z7=$?Rs&3>o3KzgzW)5d{oEan@7b^?t+lL#31g&3q_bD ziDA@%9gmen54?0xB{b6>qE2sj@*{ubrI^8U`HgLW{f!!8?|L2Q7uj1E6u&55NOIW^VpmUvl6g1WLwEloR1tcNl%!J^PemQPGuZb9FehTUcksCBjREfw<;N0 zgT^oGQH>u0h`WzjBbbnMLE>uhbrACIVh7bq)9_f~d7gQnEG0QL*1kxxfWn|N%A8f} zjAjU#a1HrPqyVG!Ujg! z%;)2vj1e9%e+kpL4W@WBcCI@WJdXIL)CK-Fz=JcGdt`b34!^nA9|S#Rn~4obPyvTh z2wR;FXwn*H1A%e5v0~nc_BH>g?1+3GznNikGhLn{b|-4?!cYlOxcQ@9 zHV6MVs0c;q?q}0ZXd9Qqq)OHM6#4=^FT?jzb~sPZqB&Gd6hdlTk|GazSuA1UU9Lo9UyhZz3VQwP~YY=wmr z7FqltHH%D3y(9dfd=_I_#a(DPE;9Sicv|A>BwFGYA}6(^CAM25EwRhIThbEaalp8` zm^WwOZ`Q_bpfV)P113@sk~#X}M5d71c*n2gn+U`a==S@GkDYt^q_=O(; zvt`TFq}P1!U8yMZrFq8GLnDVDdivOl)?7E~KH{EFy648VlP9AqzIMvp>+UdvRz>nC1!B!jNYVn6 zfdFFJXUaD`AS(oHMAy@X_gU^}3T)c5X-H3Utk3;H1k!iS2;?p8BVPfPs(%dCz*A_d7fOxLj~g-t(0(VZlY5A{dTKol;#5Hp zYAc`2uXBb(=Gr=In$>V{(RyP* ziq=|PBCUFLt6Hy1yfUxu&sGzW2oKH1J2zJMU#-_gcH^~+qOY~_UKXpnwe`A}$Lh+m zx0Us+iPinG^}5%_>VDRG-5X+cKWV-0EwQ>ATCaP1tgiIK+s>`oIlePi_r0Wbo9)}( zvAXZHUY9Tg-oCACz3%<7x@%gmi_59k?yA=7*2U@)5irR*G@Ba^ZeF`^QLWVl_j+~T zXua;AVs&3jTDRF=kfPaZw;s!Kf8V%!rYKi?Z%^>Zy%2{ipNW0&O4A2%2)kL`C9%55 zVPcK9RF~v1-ayD&*;?IaV|8C>z3$$AvwOAL>H&VuJzK4Ls9%%NuiJupvlTepuQ{mI znn(CG2PUi8Y|AG2HG3zk*{pXsb$eg-Xtm}Ee$8&J*8H7cvvabV&92H*{F)t;)oeD- ziGI!Yt=9a#Uo)@OnrHYmb6Tx=j$bpg)tZ%l&9qi)PWEf2wpw$FS94}-kTl(^H!W6g zdxtb~fwtWmg-IaWjG~HP_{fE-cXq7F7LI1*VhcpmTboMcQd@6oYxS^z_rP*;t2NX8 zn&-7zvyET#>{e@L`!&yOwPv1Q^Y^XRL}zK|KC#uBo&1_7w_3BSU-QIPYZm%7f75Es zo_@_^Tdmp0uUXz|&3=B(qg$;x(64z!t2K-Ln&Vonxr<-(;8tty>eno5wdU@A&4XI4 zIoz+gU#m6u@@wwhYE8H~515`KY*;JT%L}o(&$eFo#aP{?t=D}yR(EmhbzhCueY*9! z%VTw)+@+}E7+3#{-euRN52sD`h{wei2+z}J1Ehky^1 z@eU7PYey{MHxTQ7pS7e_)luk}DE-dDDqiVxS|^z(l9;b+(#h_%+f z4MZ#$Q5vh$ix>U9I=#I*RyQQY7F<0N-4%h_P?+Hn&oHh_F9_}!6n&C_m~xnV#>l6A z;s$trY&wJ=a8fXtQxq@bjtH4pdo{5dUr?hr+7!RNr!Z9X+9O(W(~O^wRo_DO)aY_2 z1%WYfelwYD25n<~qIstya%u1WLU>LuriE2?XZcI}BcGPh(`f4VC~`GuH?E6Ov3hS& z50{gZF&G`KCEFcG6*WJxX;~+tPiela0A`QHv{aKgB*ju*OfW-+J%HG4l9LEKDjZOe$4ODk}s?*T*I> zK;bRqsOIG#D|Yf%Bc`h~#}%R0Acdh64y71I&bRj7=)N39&d2rYK4eN`iQZW4v|Ej; zjKG;?k`21|a|qMkbqp%H$Y2cA@bD!Sib6}~ku?odakC#*MiSMb-Wo;QiU~k2%)*zg z8Y&Oym*H74mfrL$joxZ9{?y5v!V26Ju^N3dI-E!+#l7rcHH^xaN@~l_28Cc`jeJfec1}7aolN!VI z#0pUd*ChWw&j#V+TkCy^e&7LY*ZR>XPRPXwRg18c9u&6~D6hUjd(?9oTAs}s*yRL; zljUEb4KjR?bzqfG1sN>Oa5M?5(F2$cV+Qw10RK-mK42HEi}&n55|$WCth^}xTY!Wd zLG4ghwJ;QBEA3kRPyho=!<@_~W252q*YdRtXu}%VdaFT=?Xz|tHD&zfqgiER`RqHn z>|7!r64(3)f*CATvlkk@_!Ju;6)4U_#{`{_iF13Mz0ux9nj8uB%(o|`qkKpOy znf(}rqK6d_42L{nIe?kEw^k6}1<-4%FRaKst*$Dl=YjRHj*9{Rh{>r$Hf+w5@RnHc z_G&<=E2VUjjxbmF_c}tAb`2T=Tm=`-;ihWJVIe)9xyeY?6hYNQA~0T7A#J~u=L*lu z^WU638zHea8GwtNeF!xGL?HjQrj&8p(X=X==r3Gy#Uafn;0A7x!}J4bbnigqTb!xb<~F!Q4&)$332+qL%U+{*6B3;@--*n( zAIOdH;g|kee%1R=j6Z9?YnL6H=2X9orAIKaaK|xsQkOqr?oR&gmS@d>{~6pYWdl2h zH`ON0prq5k9)B5g35lImY(7Q-Dhli-phg zXf~>nu48zz7U9|Ml?jy%|GZrB|F#!vq!mKBXj5>aLreZBrhol)s$7zkT zf1>vQ-^sbQ(CMl$qb*G1N)5Hf8)`+YH-AESKQSSR&8|``$(F|Kq#kNN{192Uw-Q)Z z?CNBopZ_NzY_ULTKIEWH>6|_=Hr^nH6an)y_IT|eZiy>kPPP|A7lqS9=Pj$fK~_Cw z%Jd3AEC3FdQ$5dCKQ_Xu_pRuHdR(NJ{UjZ=6F%#4e+$qS30)BcwmMeJFT*MX?QE>M z!=)}W#dVfUK@HX^)_W<@lhpezSQvB#SrZS!o$Ca?<;#(FAO}Rs%rI;(%gr#<$?8Ww zm7V;KDIhc{1zbz0sK^Q3__q^r!)&%v{I2nqIiSJ}t1<@=Tae~xNWPgw39WIWY%pd! zvA>-l9MVI(}SQYP`ld0{(ER+=>0*0Nt z_qQ9p`rya`j*`LkxA2 z(}6$FRaR0hygZbt>V#zkv@!tft(Xhw98{QV$Ql0y%ZaH2 z=08%o%GHLaoAGW%?duNY0TT)B00C;r#mSntfyPr?ko8v*=Vc2+foSNC>Nq-?>fY(# z$a&ura4G!&{pJgJvk!3~nlCI5M}oOpwk8Y9jIuv`-1uyMjW7PJZN@3KqvkH zQ)w&!841KaaJPR2jUJ#2t&C)q#+Xrnf*`(w!EQqZ6?W z$gfTSd3^I4n8nu&s~ngQkZlwaHW!eVX1fTdAG@4YxIkzLa@+w#85zR>KodnySEzJ_ zi8w8RJS4(>%t0bEOiaeI-`mQJz(9QT^SR>jW;e zPHx($R2X|RCoj_WTkQaZ3>{i{yoNAWk72?JCIA${sRX&c9IUY6% zO$SqlBh*u8m|rZ9hiYPfyKy>vB(tY7KLYH8`?4KT-VBS`9&`|vDN#XqgT*4J;H0k- zZX{a(fO+^y5j};U6xdrfWM^B{BwW;lqAVh^P8cy11yZue3zi&Q*T8rBLZ9-`gx2Db z7aWR^mxbT>zrt6Up?=#SuOeF1(l;Tmu<(}fDM_YjiL8>Wj{g_D)!^YRyn(@xVK*`w z#C5n3m+TvVah$j4rn}dT`d?(n68r0oV(%75VX=QA4ppQ_um2WsXzw^2x_>hqs^tgK zLNjIptp4(|mVyHFDtxC9lrT16wun;j&Jsek1&;;Jil9N#qa_}sTGBrTsb(H<%(2;| zvtSNeCB%hAxDKg8s1gfBnVl?zQ%^=L#ZDnv3<0^kQe}Q(L~>KO}> zFsQ?6FhzV4=t1nOZ4Qw}-cxcJyiHBrPM-zcw_4!g0Fzb$lNEau!1}R6;2jICihmSX zl^RUrbk6&52vTvNN_r%kUF^$fcjqtmDU!ogaDre!-Ffy*9T_`QdgX&X7CPM7gZw)i zgw8$R{4jxlnvRUEKwMPsxaqjq!xOvtMFg-Vp&o)A2v4s479edeAWb79Q({(VoT}S1 z(9Hw;K7#9(DTjSp1p9adRij9s+NG8o8nj~#F-{%j3ADG} zvIy=ZjbkYs?v!^i3eRgKZ8)44>H0jl*(kglv!HPlH+!S_gi&;76n!c*kk5ea_<=0j zL1L@LfoMs%5#gz>c8u54n zVqAHp-60l&#Gx_^#bal%)6G~2&1g9eCBiG1ki{r9+OuJ@KzoHkUyT&Yu`Qvzb#P{k z;C>8O18ZUPFwyOxRE3=k_FrID4gC=Ku^uq{mQW?CRA~R8`D3V-Uy)V%K^34e)eY@J zygW!ANL&3E8WMXnZxHtz?(%U00;GnD{9=afj#iYaNCmBw@?*2s>2Zc4DyRpzOhD7l zWn7@(*Jg!|3z{b|Oip)~QNvP%sgQZ9lrkHwcG@h_hZ57J znH9m%QE^#<)z5oG_y{>2;Jp$PiRlo70vGRr0KKPX9YE;6H$`Ltw{t?!>?#gv6|dn?8r`Y0mQkVHB!E0)0$ zy0Ggefs~{XgORgp*&t$7lP*z3NXBIs3ndUMmr#g-dY)NJuo2J1f0eTx6aVsNCjP_& z@#7XMHmDrSJO>BT2+y$8UJ5t=&xDqMs4!K8``j18>p<0nwqCD$%8Pw+$INn;>u9Of z%s~0mRGA}-^s5KNetQZUz~suFfLnEuGl)TOzu^e$UO-dn<_9Zb?PxBc2ZHTsJ~%v( zR!J8AFHd7BNH&X?$?n2!Rc3SrNrHA~@5no5D)k5rxraVWH=knJ&SxFXO-goiz$y~R zHn4dI)DtC}EA)z>jcMAIwZ*F0(+U>41R7^~FTp91yWM;@t<>3*y~uD6!GQzWK6|wc zsz29E24l4WcX!na^v400G|~8>m>>9JFZ;znt`<^jskwhSinNq!E>vRRb++hRM?%3o za~Va1sQ32qC@%yNQ7?KdvqEs}q<3+T;br(B&#^#xndIF7B&?ntg_>s)giF-1C>1{ptxWpDSf14&1PG;{NGYjcfaCbl|q zo#|?h_1C5xa-&28G5gXL`q{}TsoeIsRq6(TDjyHRH~j(X>yfOmL7yrS5|@fXrFOOf z-&0{Gvdq!k5;BHwcOLCa^t`V;4S}sB?NTG42Iz_&$okXRMT6G`cSx&!WJvJhK4MpW zoxc(HO;GWNO0;$p|DtC5w=h~T`=y`j|bt~Px2E(0i))PVstnu?y zuz{%x3;`% zWf7LOwcjGPVjl({w6c*@Gqu6|4z*J@$v)}<$XaJNA2KgMEUq2?XXUOn5A=F&*DXo!;-t5B(rfobd7xwt+1D9(*k_aa{J4MV`MX9xN=h>%B@bBktb_bp zyB1T55L<$HkyA|AkOn7+0*4^7<`)Ou&ah~Ey*F1(BVL;MnmtE4KTSzmV5{dLUUkh_ zivp4IEDB2*K~VImXzdh%1ihXi9fmPy%5UT{RK;@i=EKWK`l@sH3wq8dN(nP!gwtuR z)9wzZ(=aDztU~5Wdt`o@rVP>clV1oPm9#HTm)BRAxUlv%`^JW<3xa)N`^`l3q^1+O zJiK{+gR_?XAlo_AVEkgtDY}XS1WyF0Gp+7VG{t1lfe_w6CwPw?D>=fc*Ds1MWEBXz z)EO<2+a5G_8T|xIT7%cYON99i5>p-t!1a{u870aL7ss9yK8PQA;5o0A=Vx;ghEw>W zYAO>XO)rTRz7i`O5jFR#P?rVxI95#ECnWMy1Ann;DR1yvc#={L&bY(8R~1g+dtR}_ zcfsIfngJ3H=&FNXcuLKED-N`nM|On&5O4QAi;gh(7@D>@+$bK^s(N{ zmz%y?s;{(Qe6y0`4hUwn4v!lY8@(x~=apnNREdUjrdRZwy!^~dhP`?tR>C@%R}3^{ zU{|l`+L_wCFVq7;`J#6^8S|Zv#9#vMv8APHG^aByWotGX&ca~tv)k=wALujf0|UE+ zaIXHQ{jB2Sop5vkB6a8-_VJz+*v5Beh$yAS!P<_n`O-kW1S1y_+1G3J@|n8Q!{%E} zBN(~R2~6=`d^kg+yqCD@f$O~PzMR2D=dZ)HDmR3av*+gX8CCkooT;|@n1@mpL2BZ# z3>yfu=w9o6|2E(I!&N|c^txzfa6W^Vo9Cb^fGzBs4KXqX7o!f%Z z%;3)Aj$v@5aQ!8;fHOeOBJn93)@5z(fE>X~Yn&Rhn4?G%GubJ^h2 zuw+7XhCzQ)e`gSMlkJ|w|7G~+gZq`(1J3hezWqZz-{tGl>u%N%cTnwJMoQSAv%C3*=J0qo zUrwol+MY+Qv@fRs0hZaio*ST$`2Ma35=3gAiBRnbOD!Gs2v<^9Z~DKeFoGV+S4ME%FWifh&}N82E|VRP?HLgOgo>?nE4q$SF&99tp z{$;u3eEu(MiktAx6svN*j}fI}!q^5KQ!m)lb<8?2LH^emfi98qz&RhQu0HL#lU{c^g?i_*hs54< z7uXGSB*fJv zrju>Z^+VudC`lE8YB7G?lUGsCz9vplCl&-^>(R;Z?yTe6=)coWfhZ9T!F3bqNjD3e zF4sFfMhdSWNyS-2yn0QO7rA4lsEa+zRD`=s> z62QHPQNpYx$R0to6Uag$15nABzf6OUGgon@Sd(HvHxlg*MiIe38J+kf#d9@RfaCki zJn~>Z8~_W&hlh#<`_@2Cczb>0xw_vz&EW1Kb`I1aT2j{P`EzQ`^H631elaaj&fA2_ zk%91T%w{tSI2ifRIR$2wD!Cs42dgnKwE>5L_Mt5>gbK4FE{$2QS4PW!@O4x1XdcL}vyp;r+}{Kl3+Hni8H>WWc~FYg2TE?r)|}`|9G)c~E~^KqSplzE3e<X0j1I z@@tmw)$XI>hl=+cvR~HB!=5ifl#^JPRFsoc{gYH?lvJgZROH0Il$_-6OE#k7D9L*W zk>P{lAjiSPH-2GbIpv76J$OORA%16 zuQmf{u?R#qgpZ=cbfYY2j)BvH7K|3E@DPg3u#;^F!X^KYz4ri*vP%27XXfNgCX*1- z6KX=0ngDhQh+qxawrD_Eb=83FwFO10CSYF;MO_=9;;v%AzA9i_%Tg5U2C(lMu&u63 zv9J66{^xmSG6@q5>ib^%UEk+kOJ<&V%6ZPY&t3obeKSj%*rWK{CTCnIdjO}tvuB4- zx82$E#d}8W_4;R@&VTPR)pKcikoSEr_P%eEPjYaygDrfL;Vm%N6U5yUkSdV-gPdi> z+q?rzYmnVfopMUer7B@c9OBXt{OgYN9aizA;NZ(aw;(_vlzJKDG zPcN7ndUE;X$;-!|`N6o#LdM_nVA4fQ>LN@^7~kUO41Q3tq{GtRUHI5D|9wgep_5MBfsfOPMS+13PfuOYI7M1+Yq<26tL z>eV4z>>xF!LQe`uCK2;T>x0&I$HZ7k5i>L2s3O$9=HqFI3~3)M!rLKKhKwl4s6n-{ zOHm6KAv5lnj^!UAIfZI4=HOD`uwZgI#|%Y+A0%7C4AZS`u-5j!*uUY2Zq)CzdB}vI zykP<|En>OJ@Im zITxB@s3r6XV-l(HX+))ILupSA8%DjnP;7u&8H9En>yQ5a=h_ zBrW7MciU8R4L_BX;Z`8m%@riCBt|-cDWdcvQ-p>Qk=Y$F>1grvG2qFpi&B&lS`B$C z%ClILKf%xuZksV#ghbc`BES@bh5JErfWS>esarQi#bs zo*+6*zNp)@sE!a{a|=4k+@ls{J|PytB%-c8;aUCzsjQH&K#FNX`OK!T3%c<0T2x&( zH?A|lf2Mh=Uuv91MV+>QI=rw~g6@aDckMfxfTvVQfyenAgzR$x2sQL_T2YF}nk z%@Rvhs~bvrP8ICm05D{{`g@s?@wR9yzo@!ijk%X`W`bUe()m{UG0m|lv+A^pJ!GO< zb9)FtB1;#gY+$>Kxs8mY-PKDo?R@v|t^7-MMS@{Whx%UxsOZPTJ2K3K(O~a{kEJ@> z^io1W3a`gJLM_sY2^oEPX+^V5v=I4W64(&s?bW<7uR}Aia)@ z|FnXip5v#y*iYkZJ36SDIU3Ld;1CGAy~IqY2@{GLmA{PZ8sMn?8-Cwmb9=PY)Xs}Z zgF-HwQ9i+n0h;g@ys8b6F`?eDVzf-vxOdv)8Y3efPpj&7Q^#)%_3=mrE?aE}tRycm zZ0?u=7iCWWt}q~}J@hHi9kW;C_$26&YW_uji!0+*`BsglQ@q*Oab2hajzwoz;U z>KPR%z`{TY!^H{P2!k`Odgf;|bdCec0im?>_hZ1y6Ye+{&e)zyt{~NjA$+IS?6V|t zaoTQ2#c#SSj}#2u%;GpeHtTX#vu}@tF?3gTyPa}-P~m@wx9|p-kZUw-bJq;&D|g`e zrBbe;&o_ej(Y^y=#2{xQ&K2(emUNkc67x{hft;)Al_X8Net7kGlOuF$q=_)JLY^?WZ#}MupnheR=6fqCrcX2{oo-&SKo+7Ch zKh8mr7t%=Enf=6MFm#$k$x2qoc~zFW)tTdIbBAF1+TQz0t-T<$SBOj*%DaP7-q4=3d z3~dPGy$2iOy$88Xf3wY3{9K)afl*K0p3+uxA?4uqoVxEZv%XvT=z+hy^w1mU?_U4q z+aGb8V9SF^)$OF3HCpg})xH*dF@^=Y+>kl!0oddKcr!)?UM%4amMe;MC0Qaa>B0%j z>!^w-XsFAdILUbru7x5y;dV!POCx)#Xbi|0<%WC!<^`4tyT3s}-$J!Fj01=VZh|lL@V=E>y7Hxw>XSPt3_y+Ctc8xt{|$+g>tLdJIIlWzJ3X=E zXgn8nwtp$(-oQ4Xotii~PPI3vX2)q%9>x9A)10RyjNBUA%jsYnVcw`egO%~t9}n2W8t+=} ziUzoV1Xd(C6G+cOT+j1Sp@}LJ4Mba!*Ra$bLfQRPSY37MiA^ssXE^o=PUkNcFR3;s zaSb{Ym_tP&bHWcGk97|nox;B2pt~n0gAi~E3yjI#M%kcdivy>-E8xRR{b#YwUsIy< zHKfvQVHC_)8BIA&k;`lU3(8Tkc??O(C;nq6S1Q0Lrc$APNfr#ZCxZeccA+_yaPLg2j(2x?KOFC)dFMb5$xeV$Sir^ul@!n@kuXo0vYB>hzJ$`&N|41=6w`5k*oI#Uu zn~Ok2BWNhUY%l|>!^0LLfR9p+wHZ=1j*W1J_Akb^#~t8c6n3G<7n`sUJ=nn16&u1t zwBi1?4<-6#iggN)W17HpB1H6Ac2pnWO#CRzD1MFFJ*0CERTvb$7)WIdCekfeb%l#? zvU4DBto;mXGh`mhHzFNWx`r~{?xQt@inuVmE;JaBcoMoF51*K=a(9GhRen%4AKVqk zh=;i=9PRfrebCpWeTk`=U&|LzVNN0_A%h?3sE(Q9cC3x$kbF5>TJx(d!vIY7#b?d; zOqC)~ZGJi$xUk_nZ zR}MW!RGJROvSE^dkrk|U8%gx%>DNS+Wfo_dcc*(qc^9b~O+@KYZ>!gXJp_#&N~m}X!V5e)0t$Z(>#;ag zKiiCEIclv{N9a8@s$%mla>9eAGFa@l#aJz(@^^xbUSpXgt*dK)x|->xD?!a<^4SG! zLX5V!`bbf71{wo<-WTx{#{+Q0k$qC@j8IpN!Wk2Jjdt05?iEe|Gj zuqM?af1=klDFQU9W|}m^19f%LY+qe1gSdcE;Y65ZvrT~(S=nRej1rrtBYE*7XHiwc zyRr_ zONmwP=UOZBVD4f}C#gfn_K|L&_Mx|JDsG!*q!sIYntno!!7B;OA*zywJ}v zFky(A4Xh^v+YTS-ZaYmvsAykf(4LO63JdC8#pXE-mLoBTkF$^=zK=WT2|JuR4XGRC zwya@X=0gDfy<}L7Tmq+}UCG>^ z#94*LA&Dv|Wsb#!!Xdr$KG&EGR=VR|Q(axJjtkw5PQV|Ma3iT272)G;a1KMUW{f~8 zUCfJcU_zNXBpq0o{ZxP`WPgJL7fZaLO|K%Jyz^HHnvOyDU^NX(crr^G>4ilJ4DRTv z7L(NK>jX`ScQeYDC5-sDjJOEB2~Q8VolW%yx;$^h*g+c6Z3x|=vF7qDtb*=V*nH*iRnW`WRTaBhI_Ne$bAv%cD+2`nf0wP?G;8S! z-NJl(Q4NU?8F&dOqN;}Q^tWVXjdQPeS^0{&0_UDX%qE;9PIXO0=FDjw znKK%R@k~ItoJ5&%rCJ_hud$JlH&j~ovo_{|8TAl443J-*n&y6C8$IiLG+&u6AWT{m zpib=^b*xcQvR>#o16VzjluYm``QFDMdnpjfC-wAx&Z|&&SL*g*e8{4#;Y^2PMfP8-f}&Jo~7DLr@rRw2I8xEkz0SKF$is!&X@SzB2xmJ|d8 zzJd?2!yRW&HMP9vAc=W!b8sM+>%MtL`7FIf)(ZD*O3w5}X*1{XzUy~=l;Y)G5B096 z8zP?D0(C?ZdUmB7Hqx0ffsGM+MpMYB3bk(~uKzPA2yUPJ5L}_Tg5VGa_g(9Y$92%0 z-G#iA63`N|eZRu^=nlhcj-VKT!0?Bp-Pk&UqWO?Ah!86#-u4QI*AYGrtoh3?zI(gGtqsq^d+x?MA9Zoj+GV2zo`wxK@V|#2|2iYPQ`N+nI*c z7uvvLI5-V7F?~ndeuvq7(S@L0f_V&~1(jEE zMe4_q+#aKXU#P7pORC(C2IyH08jb8dx1pXzxWigd8^-ZsWE#udgd5yQl#hFgpxBf+ zvngiOne)1YkMqS0R+<^VJhDci;K&;NkKRkGB8NxD5Uhdojq$suj`u5Gw%zF&EP@^` zn)Ax5*>!hS{C((z>o)eU>2nGPgDnrXogNhWpYJbyd%?=619(>oR#?6a1ZNx-n(L-D zgiV!eTiJ<1js!I%>%Kr>0OCxvkB-Z|LIc){ z6H?9BN{|2s8b?S{4h)Wy7XGF8kW_P8UE=QrYY_OfDxa!Hk)~NvPn<*nCDRS()K)jb z&v!OLEOogUaO?FD=+7VKKU*2*`GQxbnyo*2lp>xRl#LKp+=|4<>!#|(i_Kf6u9*aV zNLi={DiI0)(@hpZ>eW{=n@MSa>)s0FU1dYEkP!v!pLFS|_*Y4<;2Jt`g87Ba{?6HG zuOFl2*hv%co83o{ReBNjy}Y3=VVPlEIu)1Puas08;-&TEuT zT0VK&JcL)-B5bOMpY1}UEO(TL2`;ViplzVoV2#R`cBCs!IE04Re+i!DXB1tSmlf70^Vz~Nkt(R;Ts6d=|*GV9SGT zXRt{DS4n$^z9Gqe)>GqqJa*(mllLlm>$Mk0cV5_9UCYm7v)u_xF4}b%)J9!N1X=n8 z5HPtI>ZLlGq%IMMDbSuY<}~bHvl}kQTwxxFEm7J_7XpV*`LVW;qdjG8#GG9&n5>ZQ zwb(*xAl;qj*EQZ_Uck5a<4msB9+PUmr}rQmYaF(@>0OCX7|XLVh-=IK>~Ain%%;9S z!rVl|%3(%CtU+-1zn7h@^o61PUToebS;;qKs9Fn~b>%s!J}zW#Z?h79mxsJjPeAv3 zFX;gKnJh0A3nQxa3aBOYC7k==qhV1pS!!Xo7(~v$1~)#TWrl{Di^Mmm7y)wG zl*m5C#c%})=ONz3kRCt*LuDek86f?F$Mygqgjj&xIU*GvT181hpve1Y>5 zp2@_F?vIH$MDM6v1MkF}okt>#4xkOFY+4fH3D>hEF5e>|9R;J;WonpzOvOhkn;o)} zo0y^~lvXw7xJJ8|DL_dAbQ)F=Bh}1ijTH!3GffqLg(=F%-7Yy>Vcuridg4yiyveVg zPDHpA6RxRlw3dhFvnsM>3kHdH%tG|gGV?das*)AjKShs>^KaD>Hyq#flbLg*#_rviwmTR z5K@|DNH*Z*|IM>WKk;KfU%955hK8d~K{kzqU3Av1(`(2-5A`kdE+%t=!v*GAa3oMF zVL>q-H@7Uk%P?weqHyik?G7PY=B{uGkoF-{@J2`ho;26!yW=E|bvIv9iHJ2s%a3cL zN-3hKqnA?D-%lyR$`BsC8`qP_Fdf&0#$etJ5vcW(hrH(6&1B=Q8jqb?gHt$a-8DbA zlhP%N|H}mH&#yaV-T5P5v9-zmky=ra*;N~TfCm2PqQg$R{^;Yo4|-+jS+CX2n6dY6 zNb;5klPUs9LkLNg3k^nHf)Q57Xipf^-|G~Ao~yj8=CV&fs?Za+r1=iU$acP=p}X#m_(zHM%aP&lB{ z>|aMKQB@Zr(po}sALYn^_NS<>N)5?%e+7J&_q+j)hp_sH7QgOnT&|pqP^Vi6=ann3 z01KuRKn>}bZqn~@^2Feaud>}j{g7@K(@UVMxmXqoDXCnu<^!kiCAQr-TBf_pZ*=wZtqV6WLb_q*c+c#wI89@+jpC{L&pMXN`N9QXv7VRpki~9NDb1qPVL? z2E)zw3>iz&fy@T`b>7=vK<#>w>#cu(>YXFgp1R`nkAtH(e>M5zTU7U?<-w$>3IjWG zW{(+H9lGJV-Y?EO{oEVOzs|m4lP<5j_sg?4p7PAdi4Q+=^0{Yp>yUKAw0*xV0ZbO` zSuo_dm832;xloq)JJS_VgVgT|ts-zUUF%CMyHMKOAoCP0 z%cYw?jW56bCX@#iFr(5keGABmfDmpA>4bp!+jOP)g;|MEpAP)uYKQGZG>Y8tL8vy$ zK8vc!i}tT-htWUcOo*o%y=Jh2Sl;hV)z(l_lT};eF0R@dPcpyK{!+m(wf|16wx+$? zADg|3AjLA^56S~#KT*gJYP@?I`K=N0gFjLy$e_wSh%gr5v6Q5_2yTa$rVBlw->{Y+ z3_+{6xx8bH^X`gC2}L(3Y>uzdW1&y&Vpj^Of|Ml`x%Bez;d-5fa&6RtnqUu=21A9H z!j0}%%C(8N=q1nszXBD)=q_iXd8$_fN$_eBKbxJ07^eFH(-=Wvx@#J#$q`*v@4_qT zW;t0cnWh@xoFR)j)11od#NNi$|K>JCF1CZr<0&V29xS0{T}^#0AqXt(Q@4b@vKW+G z!l6HA31hDXZ+JH77Sb>OMieYoHt$C)q}(swLORCR)`gVEdxwP_AQq`5CINN<{A;jT zhq<)(@ytXHelDm}joabPo`;HBe24td;>yu7(H5?4!{TCn_!X{}!{xNFi?y&?1eR7| zHn+6WFXKxq?|#yym0Uu~tOHCas&z}b0_N(L(wswxDz=#Yf6QWXv8o^b7w$*sd0%8E z2{TB@{VLpgoe^KJL))@myRaFVSgIzmAGKV!P=N0zZN#f7CC6$X>f!A6^)!*EgE%_k zYxoQ$qqG}QP@0zTr|(B*EMATg0MY;beoV?(pYnAU6CR>XNu^$T<;3=+HsTd{F?=@L zrj_00o%o`^6X*Q+ohaICwiBVyHQ+hD8ZHQg?ZFb>uD^L7yuL(}2LwzIP0L$KrYa;% z1(Iq1Rwa|nYo7B06PT?Sme17wjTY=r;1P~=AE9VT0qL@%8KgBI2jM&BreLI^B^isr z<4niN6HI+`DualPA;xET>Ozl9cqSfl51>3fVp@FBH9b;CwCIWIh&SOeDTb-=g?ju@%vGA& z>qsw4@15m4$`8iwucagk)rIwalzGPK3RJK{pv2a;xvpU3_<9^7ZAbf@>QF{3Q2u%B z?XDu5p0~S_Le5cv4SqKRE$Ovs;$G%{oND$>i0|(xk9zKWYRmuuF>`tD-azw+t!%KfzFN}zDn|Df0E}282!+!HED5en>J~2amgzGnj><5Vnp<9n;16hl_59Q zHArfymFrcJ&FPiy>jv+uBk$`C)5=3>v1vSNxlbgumO`Xh#hpe&WKmpMRFg5| z$b3ECD@@Q0jTyprKA@!x(SkgW7+;*->b}>;NxC zTaGEv{90jl&Oh*>w)+-C_U$o}OiVnM8c zx_Zd&E^x6nKiM2>l$2ZZ3s(^-)yxBe9$DDLFX|rmXKrCANQSGyIhn937r0dk;Q7-^ z%zQnD+zGZ}ca6Xb7 zHk@Ay%w=wC#!R3e5;VSAs)yQIJ@0!W#yR1{b{FD} zJ9DTx@gWDMZ8PZQDr%@Bu>;-PT6I0++x>AGT>G1K{eScNU#=eg&8CgTUmrSSm11x$ z4<lhfekaN&DB+m&oO=06{Ov}kDI)V4bflKLDFd5fO( z7ml(uBXG&2-a;6g$}PB_wBsk?!Pno>HeISmswwws4=;3D$$?1SZDcuu?j^}N)GObn zlTMwAjXPdd$xTlj1XSS}!-|l*$WPH_HXW>7?Qc2qw(b`-fLZWH&np@j!%|H zD8r+jcygca3oY5u?nXC}YD~+mBC_-g8MvJNwRXQ#U1s!Tmp+>|$+ib6 zJd7vybO*Q&EKvq;1TKy%vfv3sF4de=>9*-+r1SilAx&=z6JTHuFsy=MD#6^@tPfUG zu)fBD8ek>8t1lo*mv62`VMreXy=Iu9H5aB(V`Az8W=?IA(*?X8ihEDlYjkFDuQ+Vj zF#mQHKjwBpv@&jY8#9Kbv1Deblu|dI=?_dYiTB1h?@di>-W!?zV#Ln$r=)q4sj++q zQ-ep67QxB#;YfG(d^oZ1@;m*qj!DTdcPdn6-Xe@cWlUB5>mX0)XyhwCOLrxS`M8$E zmoU|jNMiaK@D?|cE#Ukx$xL+(1ns3|Eo!8=Z>nDoq8F}jz9wcGH`1{OG%SsirSy`g;LqZsBqb(KBZU@6-;n($XM~&&rP8 zDefP{weC&IHbr;ivvMtPZAVbI%na=npA{;q|3req86PL8U7Z_j>8=U~GwfnhkC;VN zbBrFyH3*{ubGDkksq`xaA7Ed~Pl^hfTtM>`YPgPbnhL~-$J(_DC^4Txk6qdRnL-o( z3p)mM&>Y-&Foq}&&lCk~f4`$$((O7lgqoOTqadR({|c;HzjU?DUIFW~*@xMVij*k) z3^1IJq~={p2G<4`&wma4PJc2aU9#w1Wbrn!BLcGF3d-iYIQ5%~<>IR)CsbAXGD$#m zt}yM8U$li5TAI*tT5#xvW}Zq%3p8osRnEpUvKkm8TsE=*jZyTMYABfEIaR^a14+c= z+N`von}o}VRHI0{#;{ED4frPQt#2`p)FO5<+5pLy5qVv^M#2lgYg2 z9)GH9ooj2Dj%&BVpT>CXY0pINbVLIev8>wz==VD;>s|h`{>i-}MMlV@Zd%r-{dat- z`8zr>1@~=wM|}1#6PVEYej3mwS8H&^NZg}01xg$#Y+@P&J8k*U2Je}`Tj-_*Cd%=G zz0jm81p9^0P(~M(0x!TH$s67%XO6U`MzVSIq(w7Tz}|7-RWsBF!zZPEcfx`HFGxwH zVE5Eq$@@VK()EnCi+P>7G^6JryPsAzx^sh%_q0FyovU0UJ}_PZCq85{x>eS->txa- z+|N)kAZz~MYAX>SogS&CXo)yi2kZ0info2Jy39oo94WD}xYs@?1S z5a4w@uS9uDx)<$IE>+Cb)4{$dm-t(9*$5tQwwhuBUaU&4xe`&SDY*?^ow1rjWL2^? z(#>PIEq`c{j%;<2+MYeJ z1I32v&cvEGt@L?93$f6T{WmSN_Rq5cZ(VM|8{pQZi+NuxYqZiHfd{bur`tSvf|W`{ zXGUW85QvaicujCcwt+&N19GPnlFq(ePgK<6M7$E6XD~IIYwX^aLMKsE2M8^l3QKcX zthGUy>}7-7{%5vALf{Z{p7HQ`REep&TmAllWxAg3XiK z!9$Oy_JJYSGc7#ArtSgXSCNiM(kS`yD5>#4cQV3(Tf=_F3gt>prP3mG^bS>+Avqmh!%sbF55Wi2(>8L)ZO*P^t? zQl~-`_#@v)MYLo?7nYu(lb~*g(|fUM8B){|%~P{yKdYLBA{PY+`8so_ThZD zVhg9%(T#e&O{Zp8Ee`mXf{h^kQQ<%aUB(Ki0O{A$eLz;P(7w_Wv<(|uGZcLZD;NWt zuo;XYu81`^4(M$&Qu|+e35x&RP=nO&O5VwF?a=z23F%T|a?^`doiLau}}PbFNK=(YkgbZ&B+DX#Ab3#Z^mE%54M`_EQDL@EqbU zz|BY6_D&*bEiPC;QvBY7;_DxB8qD-CN641h>Mq}%V1@Q zuLFCNYi|Wo;>Z|vh7S8l%0+YfRI9NBn9W0)IddA#^ukD0m;uB>z$eA|4k^~_iH`R~ zU>sjeO$<(a%l}G=45vu^f?7k;7=3vn4Zn5Li|vHQ9rl3-o>T{FSh-|5%DpKG%_oPu?i|pt<){*hc zO=NtpBV(0nnuF-^eXa5oXPRG&sW`nL8Th8{dO`CgZ_+29!(3w8%?pl!7%D8rwf-k& zCaV4KfavtN!;4D!W>h-?M|qZctK5N$Te|sLv&madr>4W3&P0u08lR`}H{sioY5We~ zmLA8qNh|c!{TRLt5%)H(38{3SvM4@!V6vN{&rlm3-^ntsy9HB)Q`DHS!LE0U^(_7o zf3;e-AyZv)>% zR}Qy>-NyaIgAGT`Q`6PX&0qVyce11Ych_D@8fLE`VCceIN}M;77*B&6ep&|J}W zF+Ut_?|tYy+Wr~x6~Kr8J9d}n@sc{1O9eq}^DAgGN3l`#RwGrzfD2K|5yfQ)b$4X0 zyW*6Q$6}Bxc3B-`>d;eyc9Q_6@wqx#o;w)dvPxxLZKMrb%(@y*ZNnNipLX#^w~Kis1mYl%Sm1F>EM$e0u#iaLA3llP?7?YfQIn$y7?LinTyPrQF#%5F zb3X;{rA4d1w%|1TGZRAhdT^RV?&s9SJJnSEP4Ks@Gex3dhR7qJ0vzY|4}>Zc05(3~ zAuHEkUvBLIY-}qylEjCLkvV`(y$7w)o4)t|i8qaCP5uACOVGbJ8uaSHHv45Qzw}D{ z<*847I(UI!x+eY-znd~ldMEz!a_pLQEq{q$^Af*wO1vii%gg+-TkMw;{p`}3H9>Y~8> z523omyON^1QreE{;vTz>>PkV5C7`-AjwV!>Ug*Am`7d-^a#R-sjH9|No}3n_E;s(2 zqq;yjBu90n;53S%x^(LnsIF$WZh`8u)( zOPZm&G;=3z^=B>_s;dtek7|lKYz24cf$qp-_E2{%AzgXHZQ4-e{zv$Fh3X=TH5_dN zSeM&9f^}I`0l~WZQ-NEe-otb?Ir=?JSCiVWUE8Ggm&Ufv@)lc1GBx?uNfcyFH-i%- z!E{}`?f+*jKasWc^=mLKhUd4bFH!i_o}Bu2`N;mK&3f(lKfe6u$LbcH@@37wT;B3v z(%i75(6O;$S@P>tS;dD&rtI zspv+P^xr~$t+07RQb#Rd83EPb@`2djROc zz&6j(LW5QrS{G7yJFs2MFZzbzeV|P=L zT$SKekB0^H+{}^Ry_DkgTPc)TWB2fWAI1_^&?fr)usM<=WPbkuu^|zSgj3&`V+r+cxEFXYgm(&5lT-yFtbt3JBsNbL8V~*?2q!q#8KKV zE4|_2KaY*BB0jou+i$5iI`^|195I@N!9mz2Uj?f5&ZpYciLpmh{AaLw7FIN!4_WrY z4apRxxqc91tC%a(n_9_-?{o~bS)*K#`XXVDy%+#S5PJw#(F*}wivIJDMO`2XyJ-!Q zyx5;J9LDYCMMv2y}*$dud((wBQOujl?;&=ukM%_&u0Q7J7s=(CSoZFaw^(pPAzx1uu)}(JO z9l`Hl&ZzAY?+uKh;MFIWN$s^QIi`-@o!da=DOU77rN522hgun`r}+kWvMlmPO}Q&+ z0-{`X6YXflZ}QINC(5b|0d5HG+p!F+*nCIK&h=aM%Dlh|=(C~LM5i@z8BQFf^j@#I z*G9GRqMHLp9b{bzt$KanP%n})B-B|)_Jt}vb_#EGZeGf-2gAd;pn&k2koIzwJo715SIMX=|rN1ot&8)Hm1Wh6d)6?yhufGP-N|NZ70a)WMZ{q~nGi%w)W=sefhO zRp85z%)K%QIpz*m zm40&QF$$h#25F+nlmBMT&2K;pcY18bWT;5JxuC)!X=}x7gTq3hSc(Q*(fw!E@vc9I zxraUlE$muJNZ4b*^mF6LhJJzSQl#hyNLWBvVD&*>yk=-15L&bNg~?Cd zKYq#=$L;;x_#2KNb_cR7G-KL5?}bYqU3vbJPxg!))b5O=y$h4dvZP|`uoUh&ztf1L zAMU*WrIY$EIJ2`oq%x^|Op2}_uost4J?8l@XSB~uDLd!vFGuZ`_I}d(T1o3|FJs0+ zOO|~-=Z>o%slBu|FyZE#uRT0z766M>7h9xvpdEFT%|PK8$I=vPpCY=hpGm;Y6=b-M z{sAzTf2cUae9A%~LjI}Gb0~kIk{DYE#^G9)HrJgUt=xS$ND(E4fO@=KE3Tq`L==au zwFR?jVnM9{^(IrHuQYm+#9DWO0#M6kgz4A1{bi&OT0T~W=i3gE5w_cCJgdYs%4vF) zwas=LCiI*6l1L`LxT`1@5i~gp(jBfa0DJxjKJTUyb!Gzp;!dn3XOHNFI@PqJG;f+( z%15ZJ1lJ%VztjT@3`B>*ZdyQwQX^D5mNYK9#NCank2`c%HH>Ivuq{3w#zu?Y4e+*O zaU(_}vsJB1AlsMVNEojVs?O9n^a540*cTKi?5vcA2UMqGJ7k2JZFIX#=1X<2GF&^H zjw+(C+D4D1UIwScj!~*R7^cJ~Wiv?LP80a(d)K%k)y%?&>9|r6?E7AT1Arr_s3pS#}=qV4vt+fntOkYDSU=pR({0e_4BkXwrIzc%dM(cDZ z6$2AvZ@N|Tqq@g;-aYs@(GB7?RSxhk`7nAiqLF= z>oUzUx|Az&ulGy8F8*SQ0F$Q7Yd}soWJ~!-G_2LWx793KPb65Ii!T zDU%I~G8(y#(rHucX*EFer}m_FU}38zy5toV_q z^7O;FFRVekGJgV4LoHL%K4vjDR7K7<8(lG~uE6^_eU1%o=DxqIhrZW{7`Zz-$~q9} zF&$;sx~sX;94d$x_S6*CBpXz69p5tmwo6w5BtVLWzWsT)lA~dFK7(Mt3*~(8lJD)oo?5Q(a54y zF&vXk|Dve5sDc7=3+%8}c7&$hHH+UonK3P(wolrxNs+Q0euLEu7RJ z{y{GKmF;kR>>EQ&Uq!8YiZR;V)^cy!oK%|SlU-P36&LPChl_AUc>OVCPz@*~`N^AH zBX)D&WNkq63Z#wl7km>oeGy1f__e(?GEuL856T}AmsG;vozWR>msvbmmNDPrSvx2g92H{%*B1#KuPS$>xq~X^x{Ypy+*K=y#WKKc=ZB9k3F!q}!3k1PH zUr$xBrdxTjZu-MdODAuE*+xQixR8ily%n0ca0`e%GmluTMz{pf%MLHY&DW$L4?nq( zDTnX5M28IX0%`m4iQt`khs|>M)Zn?+)Y7ByLe+?1dLB0{s%ckaUGv>?*L+t2Oitgx z=~4RmDhZG0`Y*l-lV$P(^J;1Zf7PnTEJlLu?oGoIl4#@epjpz%OhCV|`2(TxF5X$A zwIq3wMq9vO5ZTP%c}Px61uses9K$h05{@6Wl`7H9e0d=fn)kdmLLTu#{&zbmpbkRw z-i4Y@ewrQO+6?S*1Ui$tDREBkiKwt-Fj^SP)EJTHDaRmB(I;H7uX6Lh_O=l{vB+T9CHRaN8)6=88Y(^7-vUcVhcH-+^`GZS&57|KfWsUlXq z*zx@9r-^d~%`7W2N+tECu|*NW9rMs32iN(53W%w{g#eZUmWVtNS?y(mhh?+)|LwBv z^utB$-KaNE3MAb2fZ0zPQk>mLGF-tz6~m}{!~Ok{NG7(moPKetXB1`cicyqapT~6m z=II*;@0D5qcBi*zJlN-rqq^E*7)31)CN*G^nmS1>rKG0L;q0)`$)AkRUHzNi4>jEa zca<%j^iBJL9DMSTPmli9y~}HR>|OZbZ8u((wL8VuLPO_%n|Ib;&%E4t$o!SRIe$`q zQlluTNhZ1}^tUr$UB7K@7EYMJb_@twXlJ#dz5yJ01Qea?iW-Q6ISHu}an$>CX?p?l z^)&Ud!4!fct(d6-e=1uN#do}q+^|kx?X<eqT+2-oqtkSc(UQYOuZHn5TRXF|S0izu-Nae5z3!#0j~gFV9M+^mpuY@%~* z*Lx8Tu81!pK9`fR2r?*@J{I<E)x zg>xLoWA4IW3t~7L`Kz+=Z_2TfA-H>l{-knfN#*luJOrT#s)MP) zT!r1U-0of_E>Pl-`d?C#p~zf;;W8k6tZhFWuNnE8bebx)Lrd*G7#183EcQrug=@p2 z)&jb*k3;*5n&f;oUHYS$AA$`3LH;A!<_%Z|=*?m5PfKrN6wypjxOGns3Nd z@wc5Wcu_I;`K0`>M*x;E1%x;&@C$ICG&43*H=K1M_1|4|m?+${>sw6biRZElXT~yW4u5fka4sKAusABGr;~>xiCToLQ zlTMNoabeheOQCBNn#-wyk}v;ZF9@y+_ff<_MGdk=fQSQHAEsgDVXXpkNHaG$5A6vN zw#0js!@(^3J$JyRE+WJ?3s^~|fVYLJBjarFA)ed|D~_h;o_{%98s1CGtaQ=b@EHkz zwTfj6SKHj#NP3hb?9eK^N3|UYkE;w1+D1|9Q-t8IA#9;MAd!hV2SBYR1hUJ$p!r94 zKSX~YUdNmkDVa2<>68fzS9o9CVr8lGZgdfCqr1DE^gIQNWDu^9AsxBYcDTd_Cs4$B zT8(MsUBxyyofij|4BdQ8?!IjO75-(C+zk2udn;#DUrSPafU3KCAJggvA%Y#X!VN$`DRG20$)sa-jC5@sc^#&&O z@!dv?lu*{^(eIbf(yMJQeCiP)=T?*Ng$904kj+dBW^SW& z^&n)9@nk*?QFpF@{n~8cf-xry9Lspefd@~=;F?-OKX=g?+%LMCkZAXVJk53o9oLnw zj_J6yDXTa)Rn>T#*V*1t#l?#CR9yToi7KvRXkSj3#e=6oCC{J1=Q08`uH=ZsSp|>4 zD16mr2}eyfdOkR5S#Vo;@uxH`)Kt~pI~tpRhPo5}S&q9W#HR5YtYMl0czZDZxKT8l zCprc=Wx4x)guWl5kLDan5%&aH;1n)5OQ*9@y=kp94A|&ESp|#NBk8U z&`^2aVD?%bcsjZ#(&&f_;8*v-+&wo^125uw)nFWMbfwoC$#T*x@`|^oh?C`+r>DcV zZ*4j%yF7jrO2RRTf`A#44gu!$2H9D=2wY!a#e83dIi%MoV4~UqH!lX#o)lhgf&XvBbJ<$yT zDUG<%a?lT;fej=PMNJ_Nx{)>&cODHCL>z>GVS(n20I6$&n5~_GhlJY%XjKp2BT$lx z2GLPkNCl}%@_DqKf?7{d@F<9K7dxpzqXnjJk4G^wfDLxwQ8=oQJ1RGjZ{9@h z_kN?<{B3Ls<~tU{uWO3o(^B>hmNGGh?}Uh)<0-y#Tp*qQOm%D|!)$X20fkTsF*4Nh zIf`8L}SK9Du~VHxc$I3Qpm}go(kk#r}Rw*8KwM;Gp~Fh zsqm7MeQ9baoHL(uia_^pGz+-SDhzqZ1`jLh`Wq<%FHoS!HMvA*cl|JPouHx1iSAl% zo_8hw)K}*wW~7dx?ZLS2r3eog6LkZ~jz&iFknZN(Dunc8B)>TTuRJw`w%DA*W1>WY z)x{bfOlhPA_i`*tk7OirWi z_j8~--(^SNPp34-AZH>y(7jP`=Q5M0vE@)9o43+d`mO(V6KvP}9aGpcRnwnl?ntQ) z^}@SAWQ+1IDe^V1MTH9jX}t^qrCx*qvuvsh@V=fBI#^6@0eMS6^N*Iy3B! z?6c_EYo}Dt8@hPw*2zgt;-tp2oVok@mTDGKTV7nRLt-%M`n4EZZUGllY9dEYssF-l zw$92I?m}c)rbw2#B31RudYjEe#7girp$I7G+$C~JlAmQ@=x1)!hu$G8077k~ zq1iBMsd_*3;)j0T$KKvK2!)JNjA7||h_AJQ1)M7MH@uj|0lQ2j>h(LgMXwZ-!iztV=jDH*7{WC}6pNC3xRl z1&uwPbXKEoWIj&80SJ!gBf^DHC_W2}n$*xxEI3+$=SZVpjj(r|mWE_3KMo7AddkW1 zmoqxiEH3{7ZtPkWQDqOi4v;Wp_<8^frJm1wq5N}O`CY)A?n#W*VUykq$ZNj{tzk$Q zbssedDC#AurmpxbfRV~c3(Xf)%p(zTMhU}3hS65lA5A$qAm&0<2gs<#NmxOJ2UA^T z%lX8!@pxLG5v8E0cIcl3}g8oN(tjO7-MM%9rN z(f866oI%;-nEH)M1FqL77tf)bvp~6ke-94}&4T|IlGPk%efP*lJ0R+uFS5H*+ zyrg&r%-hp(khfz6X|;-LL7d=2nGs*bvO|OqB+(rojQ60|Yy8ogFL)%T{~HPW~M3{g(94VacxZ?>XB0 z&7~1+^%&RJI>TbV{Xaya__#p^jP73l(kW+Wm zei%3LK&bK8b+B}UV}4)!KWL3KE0%u{NK|)X3W5)+MEM}uT*hR>Njg(5(}Zino46@D zAUNn%p)QCE*yM;&KAZfbOMpf`t%nGKwIOcmfrGgg0NvqA9^A>@b|~nrD+GTmVd1-q zdO_Or`y_(ax#ssXF_mO3d=kLjt3U>KV`tWz7yP?1$b`Ec28qqn-DER#Y{@xOoe#{7;O27P_gVM+60d6jSAHEW=5{@eY<7={V2j}PITL;N9hP<@BJqo&cd*+?@t&!`+N)eeqdQi-$% zsph!oI7WY2hx!j* zCB(dwEeeDTQUes?uYB~N{;&HD?Z0Bt+mu`v-7TF{@94kJs~>+De)6LehDUBXw|L6y z(w18uOsXQy5pLt?_g-9|b=K6i3p>=kH?hyM&y!Np86})qddr2|uiJF<2^)6*``}r> zK7DS|08-LWQ&P1&Y4{2%8x)(aZ$KM)8yPWdAl zhRULRRVOrk>xuh>K&Q)^y+G=4wPci=@KM0W@qIVJ@~XY24(KCSP1o8R(+4k;c_Ab} zGD?AB83>hQ?(dX)Sv08AaPB3ERZvlNq(LV>n+CaTE@ zorB4Jc@SXoS5#BxN*vI-eJ3+iAVduU4elWt+S)w!hgx6c7}TxLEHs>^iv1&raD+UdYH(OR8~Ow=aQlg7H`IeE)p?MCW?Ft^^bwHSYUlg`B8vIF znJOZNFw?fk}^K*4Ed64c^j`Rp}aFB~r%}{T+w9VukZ;AH;iNkO0%@C*kn_r{t zUJXKWH-Wq;tc^ToKqkw(&smWz31hm(jj0=`*e%!v5OuYAi>2^rL2wj*g|&o^7NF#v z(}Hl9&1pdyemkKB&CcFrG~zgm4Cjg2Q4m8KLaHnNt(o;G@2_lV@Wh-xG;B^qS6;~! z^oWw6xTV>CuELz)B6TLnFdf`JX1>vkA9h{6oo21(IRwQT$fw{m7H55c7)&b0n|YM8 z(jdW;*f3^_HxN8_y5(lhr4u6K;Eg=qu#z3P(@r%$bLfGw0&~z?9n|2+r8dj`nZ!Mf z86M=BmSrPV0D@<^h@Tqg)9Q^SDu2n|ts(8GGUufq`Kd?kf?eS22fp;n;^BYn`E+K^ zo0<8CFMj!6@!*yRlXB~%>^3Q5F6qHNFFoRg=ijgSaA;~|@Q9DI-}x%3tVqi2ld|w7 z(mD=$XWkjpN1QroR_&}YbKCb{Jd`Y>;G-RG95DX29xD&OXmjzM`6Kg__V7sBRs^wd zj=+EX2b)J2tjE@blCd>0Zcp}2vUlmR>9wc_LG&W|i)ppCCqN%qKSt`LdfX>f@{^MI z0+O~LyJa8M7m9{wLXsu&$M#U`qk^OPB>Xx@Gt5&sRYwB-!YA@1;HP|%a(h~<^JCDQ zF8lWMMr1@k3?*F#q)e5K3$EaIywdKoIa3i=(q%Ec$5A0mO^EOs>;9?cV_2Zt1}#In z1?}$2sU^7v4sZvA$qP29+2ImbTLD+|FkRG}?z|eTIZn+#-7tyt61f}6Hbsbi$T$ZM zb7vf@8Ht}!6FcLh=_1jnIaV^D#aMN=H&(pVMBu3d&pkk5NHP@QNzi_ekCXSu&ZC@o zC^W#IfTvwC{MgLlNOxsdo}oZn1ay=bXN4FO8I*sf2b*ecU*v{yo<0vJw5wDumm#GG z!i$0vm50@v1rLT-vkWsU&C}GOWN<4{;eb&@>w88PTQ}#F$MlPX0H-qu>$b{s%4M zL+&;O%jZUN*~by^dLR+;XM1`90YK>mj2C1-(E;iSGexe|a?q3s_V|#h>f`BW7Pr|l zlFsO}fv_N|LEMIdt7#&VzEpRm`!ms^uo+N3*5>X<2bEzq8wHibjU04@c0_D*^x}*P zPZDHOq%>F8DeAXUOS&T6@1n$APgMe~L{8R$vKj`!57E-iC6Gl*@f$cyzY>$m%{h@q z3K${N-ky$dxHCj4s4%p3FR%rV{ic8qJq24Doxu5*24E`V#-TMb=Sc!$z`!5J@+;{` zSaX^@4Ypbcd3sNtF|SSBDlvwt}%E7J#~BHaZYb7N)-Mjnm^mxkq_T&n{vJdd&Nx+QBX-~l<#MvFQ52*qaDO}`|}Y|0qzy?m!7)iLsU zCA;v-6CCYd55_1q`h#a|pkw-*%~al?*4=;(nS22yru)g=$GC^VQ-jMkrf} zn4+hf>roMvQsju>OJdqVJJNB3RCAbOkWu3fiZl$k5}{{SPRDD4@9Aar!OAs9S3nZ! zbh-e*LB#;V%IDiOG@7bX)3MT2;g#x4io{$Q5!@VK%>%f<+DnL6nfr*xbmm5s&DM9@ zxs*dhesJHykjmK$q-9(;6GuQhoV@9{GGJ^wSaWtaqH(x zcR@g2MSYHl;Ob^J#tAkb)D-@8=pmA)!9~qU0C{~P&o^D6^c5DwUw%|!-%Q-4NaG72PK+EOBcP-lWv-tco*mH$)kY)a~}Q~4ad1~wH1^Hmuk*uUfj<1bP$CHgJ#d5 zW*e$-_>0Tpw+NWeV|?;O=(sucXPGx}OX{Ag!> zD}8y4R>zYFcFU2BADS-gt5Twj8<#nI;%-J0lsoWAXao)k8(>0@t#kabq6L36N4r*h zgH)}XSfhvzd3LRHx93x89Pu^_w%bSYG*dlKAeS^Cd@!YWKo*UUh@fPWBhmKvN+;S+2y{ie6XD&?Hpc}W)#xQnQD z&XN3(WgeQw+b=}T_ryX3E6tY*E#Eq+R7f8xH9=3f9ZM?Z#n_r%lS5A4(An`kGYi-inw~OB7s6{b!7@cfo8wcs@3~hu0fsG*z!YY z54jDi`7N}M`(#G~yc@h^^wXk*?kLZQB@*??&dX5re92v&v@N+$#Cx7fe z*31^E5}YJ_5w*R{Yy$bDX{MxWbT`bb7|Xc2n{SCF4AB+}$CI>F?iVg}y-SkLp{@rk zx`{kog?@4hafI$UfuzE_p{vvpYrTqULM+HO}BcZmzCO(R`=6^a9EwE7NN=)Bao_*8KCo$=T~X&*z-z^T>7V zD7y?!Sc-r&~oEg z$*yLhqG}349j`qMJ=~B&#zR3kPE%??*4vPmst#sW1^Ls$XQC66om0VWVScYcL1|{r zbeI3S1?7m~*Y7yiX@j-3+_XFGqg9e zf~{UGQ$stwc(so)f53UvU?+ms~J1ALbj(uT&5dk(^MGqA(dSs;r4Vqgw5C3vXa{b^K(ci5-*~{jn3BZN`?H@*!-IKpF1{xuE2H88jH;TG(!u+es%Exd;QJQ9 z#X7pijcre}bcQH@lz9vEfyTa+ZKv-xMujo)BG7;qvhh3gEO))J zVIgxvb#~Za88_~sgx2LE&V(Yj8ctJ@TTS7`XZTZiTaqHTD(jLNmatQ$+ZxEHftlN( z(ye=!Rx91QVigGLIFSpV{7w~l$uc)?ew)1G0yO1H>&uYWlY ziC4M>*!;?rRWKZi#cKFBxt;G&BiMnWQ_RIpDKKXBFL#egTIm+<8tY%qCCw_`YWfD9 zqqgoD;Y`=CC%BV`^jWmI`IwUn{)C`5s#44SpPM{s53$QAWoXz98Du zoGjZ5d;}PhyUPXBbnsDHFw1|_#j`%2hoSFfP8Fy#@i)3n+hW~~a+JyMrD*Lz zx>eBRi#9WGKKr+7_LB`v#FsROGkSz}>lh1YxlRHEsC69yYPI$|-n&ar7udq{0_$m0 zfEv$4EI{4ORLd8On=foGHu75q%yxm)Go!V_LUfqJ4oB2t%iG{=n|rEPnCDU4W8DED zfbc=~D5CZ~hCST%8RUCvLdYy=HcS2yE=hCUd(%DN2i|M<1_%1yC$#_%8oFksLd00O zd`+Y6r&?6%N}WriI4`)$xrTb3G!z#+2pD+Ol z)*f&|CNd?^RuI(&hxr0BJirb1p*DU7qy(AVcaBdcLUQgMtqNs&$6t;ee}&_vv|I0Q zI+L>@BKkM+(&kh|+Omox3}k|xw|q06YAxWyGvQ%h;SX}IvCq(OaA&u2kH!}FsPQnP z=Ej*W^z7fGfo{75GNWcx33Jsfq#Q!2I9TziRQdf0DQg6Tly!iRvc3^gj_{3;vc|8F zvbPJo@iWQ@>Y`kTl<;!#Q{@{*WLhsGY^=?(Hx3w59%Bbpdy|7TDG_s^N3(vk*&ps3 z!Bw0}w7H^Lw3(-aJkVVfZDueA&;fpCFVGzEK?fQm7y|Dkc%n)kml5J|po0jS3kBJl z1)6ubK%fIIfT(sLSnfTCSh-Xxpfzri?f`uuf2R7nX1btn(L#PyyTNfbeIdts)jP_4 z#)XD5@iWQ~t{aSj>dFLt%!D;)!nabtLHC*@r#6L?BC+pTmn_6;ev+Uu7vW8MkoUAK#Ptr(g@%pnUK$YXjB;uvdMjMsz&SZEJm zYfImI6z5pLtW$O&+>>_NGva^%H&b#EGWoL{(YFKy7%8H!z`&wV(#Z}G;2fP#1r?Vi z93?LU&i6{0GM!YxPQ?(rmx8QbpzNQQ2||Xs^ajUAJDLxH*b-0@E>iZo($Yw0W`rn0 zud|!FyYj4Prb`I^?JZ0-Is$AqbZ7x5mg22)kGmBmU7KmE+SyV*VwGFtl`}Pp44hWp zv^?R2R0i{)b9bY=GxSF!r&7rVsbOQ!VR`Hr;4%846D7MEwI}=wPuj$#UxPmQ;nLw;^8MA*+Z{xQacH-aRN4lXl*rwJ$)AsqD@4Xs29-gkgwi1ijID6s)N6k?Y` zj>TkvLyjr`eTBPKni(SE$u>uzX8*=HOT-8w09J%>LJO%4isezIRJ5-;%4wwHGrQlo zw&<~~O=O+|3rk=-ux=L~S3qeaz77Ht8p??+8B%?dD1?^*Jf>A!t$V2hVG6n*@}{^F zO^yAi6qVuZg~?7j3xVZPx1788P4SnOf)gIkdX%7@<5?&u3j1rOe5}tn2GfM~acWlE zus%?*hxO6Te5}t{FV3dwu@37ag~!ACDDa61T_&B zemhwz3CM({;(-|`V>MbN#Ij`f#z3hDZWguQ7kk#5j-#;S57TMLVLe51m=y8W<*?+) zF3Dl<0!EQm;aUD#m#k_3?d#Ifco?jEE7p%|jQQ*tg<})ze=6^Fv1f z)C#sdnABoPY8u_aJ%fKbRWzN+KnUIuoX#Ti zMc}2q%_x}WTzV9IMvsEIz;yh-NL=$dW`Ws{0p^e;aHLc8R2Rup_{tC%WRi%pnS&_- ziixEle3t1f4Lo+=YVIctkm;pp(h9mB6hu~;)4>J=aP}$TS#_6S_y#p?^9%q<1O>M6 zsgA#x7GXS>Qoy=~$#vjwVK~$?mm@GC?25#Hy>q;%fbP^+chUu9#O0%h6{1il{#(aA z|3W*CQr%8p0M5IF?0MGoWprt3h(NV6xk5|n=T^nvI{sGUEpQ80+z@vBeI$^Orlah+cr`upQJoz%Sj(#^^}C<`HU;{VbVBg+)a4jR(g|U(d7t#}G}7!FK=qq}iWaFT8BqwpCu42i zSSowqLzhmCBB!S471ZaoCD_|lOzrNheHBOc%|IJ-f^wjdUrHc%%DU(;QNDkBuw?xumH zajnedY&#}!%y^9@d8e(iFTElcL^#MlXF+0h<1F9zGjW6G%WO6T8LJ?5N#BP zP0jzti(ER*Q`GNpG-S*>)1+w^n;O&5Y)lFTl3ddz8rjGMV&EwVa{&x`MkR1W;O61c z^KHH|%5osW4X#)qg}2J#NhgC4v}t!ZC_!rYT$^^G`m2YZSOTZSi zT5l?zl=Df+t>k~AalfnvrVqe~D5H;>HAy-%qg!f&r-9S@1zsNPJf_+0oIlKrs2zMh zg~cE@x6N7lR<`*rFMS$y*1>1`8ys+I(*~at+u&GY)c#>anp*6?kMZLEG{VbLa{?6VL~ConFA@ zRfXEg?EPOseRym`6(ic7-&dXD>6mTLke@gLDf-nh6NaAq*V8_k^=fEX{q3uMeZoG- z#+C<@y5Ey}+LOxLGij_9{POuxdkuVeblnO=D}5oHCoYD! zs8j}LQ<4QzqOZ+(!YOUbpaG@2P(0i*$||p(Gi6Af3r$38M$1282l6>pbua@aof`Kz zuZ2U6IJb+i41+0o){Pnq#rjJwYXj`r{JLI5U#@J|4+E&f(a6WZ}cc%&OS8Ohl6z}!SFv^ zeQ$AJ39bI&xi59 zT(@?^h88#E-o)~ql5lfuGNdo3np5j+y9*OWMBx|UIk@%OBVU_F#9Y&+$)CmLq#me2 z#DV6#N(#GG;8(1Z&HOsjgGsHYJIe0Dd)AXEF{O&ivArq6UJi~O?Vy66-qcz5G7cto zRwz<$&ZS`)PC(Tmcn!UHDU;0#zlU$om|KZiq~g7eEC)dX%gAt*58MiyFg?8odUplA z(=L3xWoop7A5&I}J1ka!{H^Jw`oJ>}usx%uy|M?@kvw2l(b|I5UjjN>MbLrbwI`S` z@Oj&AigzR46SCCFID{slWbJ+j)5ExH!gn*`$vnh#K(TpOLo;b)4Uj5WhyN~06dsbI zZku1Ruf1^p1ZL7Bx8pYkIW39!8l!55p@zhiW~Ot3ku}J{0y+_Ig#e_K2&sKLwRlmH z-VKG$#R~_Vne-y_4AnMWVzwm)$VGw2U?y$K8%;flw4cDjDgXRnw*eU)pIf#6@{2BC z{o2h#xV+`Tr0ihoZk$sxL&{!J#9xQ5_e^i|KB+?bzJL@^^YBG-hO%0F;>n_XH2%Kbl|9i*Z^G$b@VcpC)~F~ySUG|3A+&iiz;?YG)?D4DEee@00| zj%Y1HgWgRl)B4PGRpered`_NEE))uXwCV8DXChv*SLV^ z!5E*K=5C0g@vH&is)Oe7={PwGNIO^cz#3`87o=eCfgtGS-jAotKoLZKfuqwtK#Qj+ zR@T`(AFQBAXpIXf?GIUkLn8jGB>g~mJCb|>k_^zgAomnv!3P8j#dQSjjNwo(U!?-`igt!}RD+m&7Kp;m)0Ib*9^5uy_V(d)9i-STt6 zQ5T&)^7sF%RWi2g)*$L94STc&y!wu;;`@$%EamGvUvo;f5B(8w58uPMKZ8{}_iCwM z@dM3)`C%@aYG_`cYEsihU&ocFu&G9x z8(}{dLU1L}5)?R8{3)j_hby0IzA-uZH=21Go8Cd??~MMZ@3gA)ogmiq(J8Hcv8jO8 zpnC6wrDMz>;^6GD?@ObWvc*s(AYn0lIcOL$a%cx3gNvjfOY@A%$y#266>WNGxA<IfX8j zFA|0*PG^~;n(j5QM&Y$sPdC4si-h^R86TKt8JlvqV4?=kXQi6Qxy)-@$0F^_P+|o_ zO2vc}mfxi(aSnnvXK=Bu`8j%`4RFVH>alqGpj^(hZaC#g z=h$B0K_1_OZDIY!do$6ou7`f+5pSDxz}zhQs1i-gHk6|4i!4}(h@$`h3nqx&tA1)K^${~|Fp#R-0Ha? zjP*~U+aVsupugZXF4@Wx$E9yxZwLHta3hp@$ zDR+yw=PxKhu6qtPOWI{_3~P5?WscD{z>0|}Yp!>mUf~AhgXW$2?k1Gl*bh?CJDM_=4Hr(g=X-*$+EAw!t>7}5*eLJ>>8vY}AYKFH4<`X@qrMl+V#cJ~* z%g4w8#qe2zIQZ*x@Q)Yy|{`rKwd{%?1qPldcRdky}A zDpWP|aY4P!%irAfk`J!2H9d#=W_+i)hgRu3N3 zS{)M_`&FjQIqyo|Wy+M@v-4oOt3BDU#?;ikFO z0=~Kj)i=YL?S+zRK-D zxH}h3M*@R(bZ5Ju9{b*G*sF_isb{|X3~Q!)4}f_v;X!wDrIROe1#UI>%-L+NRcYqw ziY%UiG{G7EdGqml)WD(MGjQ7_T7OUPW!?d9KzPG=9`+(2aUQ}r82c2=7JPJqx0u@+ zyJ9jJYmimb*Q(|Md)l`T>?3co{pJT zgKkWBeu)K&rUSk826{WhknHH}Vq9RJc<*7Jjr;&2(?W1unw%?bxi)+!bqq zsbBNw#`rVlQ_Wi6CFb&W82!tYNW`Q@H}Twz8$RlzsjrPm<0kIIEE{lQpo8Z*^u?Os zAARtkRX6-*9}By4~nX0 zo^N0gqfs^xsAoKuqa3q>h!;ifE zvKK22byFtdZ&{w-+#_T-GridvHv@+wy{QX~yEpS-n_E{nwOKy5c`rPaSc9R_rNx)^&WGxDs-=#i-Z8!h-pN5oN$s^H}E_;ce}xFdTI`F zx^J1Jc3)B4fd!Gap6Mngrq{Y0 z`v$P4_9a$YjMFtVMb}fO(A(0?$qw<{>b{Zexw#hnudio}dBhjfB&KHYG#H1v3pK4- z@n3*86YYQ*d*dQoJPd|mWeizxr25jM5^Z;^SWll^XC<~ZWz}BUYJok3&>c)!D9;BS(83Etw zgtsLVH%%wIGoYR>6`P{J%nqDJbzgF`@|wBbJb!y|yD*}SrLSucg(MNf(b$wNW${9u zVi=2?8f#z`pq_ccrw?9vfpQpl9R{~f>J#qFt4XvfqKR83%`4k}_-_N_fcSP&L8drs!?4A-w2%Aj+&m%n@&rr-cW5x%Tq8|Jmzb3{c0@ zVi{Ho@qM(Ds}V2L2AUh^k9-QRgJ5-YoD@j2r^d_uQ|*kKxQ~IDM=1kFz}<5re>V?r zU0?!f?P>U;iM(f;=^FVw4WRrfy*|r-#T<1LOir6s=59E{ zhAm6TS996>!mIq4d+KK2vJTj9{Dp5ccXy=L!NPp=6mzN{Ee(!vwL5v>Ky+mnxL>K| zt!VPWS_Tk5Giv~*Pg%W7mR(CC*8F|7r1HHR=8y!^s_fp4F7O<;Ei!LDSKp-B^<@tK zq(rAWJ@)>`$FRs??|@u`EgQ~v?>)HwnL%^DzW?LxAJyFQ(M`9TH#0P>a?ZW_Q}$I( znpgRLa+U8@RyjLg zI`z8gotNdb@}4;a9+v+y9UQOO*RI>qTQBNu_AhUZ;VC$+=IP^ZK4@afU|nnhh~Ys4 zRLt*V0kJ;XuNo<-*W$uaOqWWH`PkKHM{)GW+ul6WYF+{U-P}D_+j9v9DNy@S3T}Bb z_2s8H2#M<*F;ZyaPGzC!)OIIXab*%N7&f&CcJrgrbPvIpn=8zIG1E9r^!C_-AEvPz z@9kkugCEp?n%aqa6aTZj1Z)JV@lP*iQa(0U?zq=omSCp56>P>mT7HbF8hGZQHa1Tk z;MO{99$tcz1(>({5eaN|PjvVhJHB7UuP@@~IJEwlpFr=%JZIDjVK+pQw7}1L7{Hik zj*xWN;Of3=dp)*|K&^QZ8fM~13VO1?Ah_wv=407@AYM?xI)qu8PtU?_D;Vdx8wd|e z!OD6a9GTn*gxxwX;;KAcXy6$<(VM#5yeovR7oM{;$GI1nLH*ArlS(j`P!4)71(9Or)16*l8grc;Gb;F_4?_{mv? zL+H2@1U1Wdu03YQka_qC_aCKPh)Nb7=I)?*1Fly)7oFBtSVP4A6Lgu(<73z`hwUgU zF;R$Lx0+wkL)_yOx@UFpa07pL|BMUty5TkD8s=nhbHpkOj~=^^z}$$vC!M>%9&s@eelISgC@&-keJO6Mz*##7CgK6>v1Y&LD%_K1 zj=8+)UKr)|yog#3msQ}oGq<=LWm;yhxz+)_jECp`jOcr|)HH<`Phnxo7(b4}^4F2( zgi;}>-`zHC}>uje&PxOT*Zo9VZ1MJdJ>8f?Kfmk6OzGcQGMgWlFiDs)-R z%Q;7&_i3WG*}d8mIsP{Gm!@Lm^TkZ}D#J9)1eoQf2A;d!{Rdv$!r^uvm*cfyoJKHT zz$G$xJQELuA*$x39b7fyx!w#V{Mgrna2kFEclj9(c^Q5pIt^2%xc@+H;P*7dEu!cZ z<6Z?MO2wEwidw`Rcla9@mqC9k&%7%np*&m7z68YeMoKL8QsV15=xDzIh{ALaRC4YyaZn@N~fO`6v)$O)eE_?^0gQn;rpvHXGlP7xv%=sjCV;uMV zt?iSzr=*GsS($(7_G8+*dzsw7#O+^7D*Oeyd~exrpjXlwXSh+hAlcA`TaSFJfg3s2 z!+Fg_l{e*EWAAO4UvvMy5}n?rII;wxc6cN<3$(H4Rm`cFZehj#WZF~k9+bx{@2O_T zIL4jm;6vIyCV+1@$3-mW+wCwTyk-zrcNt&G2w`Cqxfp#wHll*1;)Pj15-1 zuR%e!$jbf?vc0ea&D`zx27Nl`td#LS{M(khb z;eMG;bx}F*!9W#Su0^>+?U}OEd)Odz=T`&NAD&Baz}Kao(Eep# zJE{kH=L}2~N7n?@2*5Tg%qne^p z#P9P`K7b}H#MHptHs*>dltFC2LQl(6IVDfDUl0QI$iKhpIYEIR<-S z_j>ZC%3Vc&G=}82S&uhgee=v0{{3gco$p-xTUV5W;&)jN+L&TsmKI<1j2MTc$DHW= zTB{EJ5sFmL3iH}1PHVIo>OEqhw|zbWtM6G*%e^z%gV%k{yAo|YUtms4j#0qr(ac88 zIiA#~Ff5UKk#RYVd3dAz0(6|D!fD<4D0*htiXLr;iW{0Pso~C9OhvaLWExL2m#3gR zRX5vg|Ig`n~~fXLv)q7(!%I|7o#)NU_6aqPmsG8dLSw|6mRB< zps1C39q~96^G=vtGgFy$%|W6Ks0FYA#&K_PJfdzh=rGxLyt(wmeQdWQZi~mjBT+07 za<^p(V}-$vNl_asxi;M#3vY%OwXqsA1F6vo2Qwc)ZD&@R%v9C9^23NYSo)ATpoVEO z&oj7D_|%`hJ?%VT-xn{Ywr{@doJ~f%%IK@<_sh&|UVpA?W1x+d1KP=Uo z?$}Oep0}5InKmmWb%x2vMOP!Db<9&n?PZ2)6SnFa;CYz0QhbVo6DOJ5P?jO-@ElW` z=NvQWF|*O;$_sNIuoo`sbFUysoZ4H=_0Hy2Nb_PWjBn$4!98q-&K47BBb?Md^x-|? zFhrn+rz>9RFmq)OUhOY;JZu}msIYE(?8e8uRI&x+-_Y*2{ z{Id{!3iu!TuGo(%?rGd*g1Zg3%oWikkga|0fx=3eW{ZJIkijTP!%brZ=U#&$Rb31^ zj)xTk%z7V&lbNX>yP3NIZA_)pQ|NumE5qi&ZVWrn{m0bpkqN3@ValNT2%`%xe(hW0UCFTvhDNlMEY++I8;fC{r z8&26{OwfeUi+M>E*COMANprsdwt^vbkH9|jNUeIv#^tz6s2jTaxXU61TPiSg!TWT$ zlE?JvaSt(8gjT`%2V$sat`x)?g*hK=rh(0&OmkVqP8{<=J}>4y;yclGuauXCRcEDS zBMT}*O;A$Jee|ZJn(Ems1FuHd9#2mkx_t@d>K1o4!4@p@{({=`mF!7ECs{vsUAf#qJYXknU~f&Rh?G z1lqoD(VyQd(M;_&uDo{J=QEhK(Tep+MGh5T1&6^#iMwlH&ge|cFymK@-0Pa1iZ~>U zekFFgW5w0nw2lXnOdEt(Zcz(t-m9H52L|wnlhoj5i2`NJbR*qILCV`DTgEC@2Ip*c z>!#V}EeAA;XofL)Pzwj8+||CO9xQnSN{bVdC~K z_yu+mqn0&!w%t7kYr4axinX4Qq>44KyCAA%Rjg?rOck4Ff`>_t3Ew#OFEvct@5MGv zv;Q$P&*L9b)~Nr6T|8Loc?sV={!-jUi6OB$|K!2UGeutI-w- zSQp3NOtHgQ*_24re@9*4z*Mw#QK1YmTW7Gr1LrkR{ksWW_9#OQ&pbRUV2sMgzRen# z$oan-h0DFQuX)nj5vQfhwXY5yp*5X@y4bpkT*hb*Z}mtku?Z7a&Q|;pyQTiR(r@%U z110@Df8a`3qx9OVy45_=ZmG}yjF$gi+l)a^KN%O#x^+$qR0{H*x*1f@!}OZ`#2b|5 zmzR{yP#m;wy4#%Z+u2d~6l=uBV_z*PU$)4@TuxQX7BtEDbahu!TJg%1IY^*t9V@HV z-v`l~I%WNF;Wg3cR)6;XwsvRT^2`cz$)ko}p(BSQU|e}nsZl@=dyv_a z>ZTS>ZJE?^FEmJyTHoxQSUo&G?0(0MDXF)6>)qzY)q}X=pyJ^(k)VB08Q}mUnlx_< z(`UNKFqn>t;w$&PoX+Q0}ws@EAw@; zD+`8uJPWTx1)MrC^>;}YncjY|h8Yy$UN(=p+Qw@#@JZ}7b6YagXjS>BlecB}DvF1B z6bI*xs#8K{;er};lu`@LeANgn&|v`Ptf+4JNI9YzM4zu+^N%MMhrTXMy>i&mci;Eu z(S1=qG~8p(fa{BbE;EOCHk3TnZFjpKXWh6o_v+`)U3~n#2A1g`Wg4!pHO-sltCtxUKK+*vFcFHL@1g26ABKjJ2ktQ zn^_Qr-c>6f2<83vN4Yt_UZ%VUwv<(o4QE4naDS8+m`CG+*k$a&zmN}4N*uaX9n5(M zX5H2M6V7@Q4!(+j@J|F3lYlx^9ncU2^!$DWgtx-Xd8@-keTS;m4~P1v_e=e|M!lyg zj)qGB4XeuR&bXe&>=@qXgzB7vi#GFcE*+ySl!?U>#$i>3;ql}ljQjRI3>3Yu5k?K{ zbaZS8b*c_w7DBjtze4z*3L&*;6<&K4_SEf{_8+u%G*)|6O&Ivyx?kFNy4tZuiZ@D4 zflj@=3hmcn`C`Ac?&RK#uTik?X&+NZ-3YNr&p=pQy+Odf4@Tbvn+&D z#XrMUhk&~eP~Go)96Y!H8Qmc_jTpkDszW#&A^g4{As}r{5EXM_JZAm(_90ITgz)Qr zg>aJ_f;r=8PVb>vz}4+lh0q2e#P=%%^CD!8)RSvAMi(OullG>Ik)5!MaRqh^Wu02K zYpCX>=uJ3H8u&L&+VNcE1@Fzd0~$}h?K5+;gNA0w=QrsNG{dr8y=5r@^J7TL4E1C6 z@-1Jd;6c*mbyKg~vgI~>elee2I9ZeGcWTzc&FNVwn2a3C-;T~oxfmNTF66JrxPQEW zzjbzhJDgWhnkp%QI)lXDRi%xnsAS zoEeixOzzoz^wi1Wi8*7Zb)A;iJ!j&m?vuw(4o{mpu6ws`-E&7ynV2_m&e&-qAT)XE zlnP*E-qgtxM|Ydxbf2C(t$WVY(Ia!ZkDoeqe9qYJ6Q)ic+dXgWWm6|i$;Ed}IPQOv z1;#)}Oq`N8cG~E%;k>ESaz_B-_h}QS?EI6>R?W1$oTT!}B9>z!gYZq9 zM@^hEb_^n!DBSAlnJ{+rq}&5esw82J89Q%Ym$TqHyM3s zLkLgJojB_t1Yw=oI>tzwyGh;S;Cw4LZ)Bdy_C(I3L#gQd)%e;!lusMc5o2<4XHPz0 z@6*QSO_&C!b5n8Rl+iiU$BgYhCTHB}DM<`Z;!u*^iIXbXSEo%KJ$Ym}cSO#_DbvMe z5+!&NExFUf;b~)Yb0R?Iq$hT3FM^Bw*s-gW$(44WO zj_r|gz`35pPI&j|Nn;Sz{f?=tA#d8q(PQ_$ppO_eePYg-Y8NefYhnk}?maGNamdxfP)r<5mM~u!HIc0p3wEIU5`}bYflrbZx%}$aARwsXsm^N}I9j_W++Gp3OmIGT; zCc81IMze0?klY;9=y@a1QK&||tOdCfMvj>}b42dgyy;Sn9kLEJ>xg7Soa$tGlDJj% z_0dyjjF5)T6w*T$Idng5-CB*Vk+FoAK4FQEo-t4ZK3_=K7Eo@M62iyGY_THyZ!oVd3zE=QK^reHgfz}t7pWN zu`{cmt_Rq?%*)9gF@9{`h>>H)?B~F>%oj|re=<5RJ$oF=B;UKUe_&ObZi|rk=-K69 zTF%XbM1{kCEqP(9*e4z_E2DRkdZwBRbL?Cv6T5e>YUs38?3l5+qo+*_=ectxrt5NW zldeKnG+DpM#?)FRr95%nt>sy@CJQ1-Y1@Y^NuuRo&2}aWV()3P1Iw-?VNJuBExEr} z>&v$F%N;#opC{qVyS9lvA&C)g7qWarcF0nyLf5!D(FE7b?w37R;5%G7E5 z*`F!vf+nd_l4QZ&jo~57g@jG!0an*GACe@%zUC;3f~+MSm}yl_+l-q!ZSu%GcZhyq zDQ)A#pCig^w*xae2ZN3zNtwi#$~zUg73M?^ES5=vb8C+g=y#*&>}R_&VPx)v{fp~< zkDRJoskm#sBSw#eD<-kfIsvref3_+wo75aTIUDnv{YU1FJO~b#BrD2tzWQC@glpAF zg0Z6mu9F|4z{(r71Mh7miA-6V9pFmV0WDLMx4@%D<|cErBoR3X<*>YgPU7pyOemlG zs?ZqkQ_bUctIMWd@+MABmKqg`T@tIz@XXXO$)`h;C>S+k{Irqb{j8x(4}C;<=V`Rp0P_ojN z#M(-;V*8}yA2O&cO)Io5iF~Gd*>nIC-|mhHbo3HVAy$<_mAf>FY_IB@w#1Y-uzSzt z9hklfi&clrxAt<4Ll=^}VQa+5Y2(qY%bS6fk$v*S@+>It9&#{h^wv14|X_`uvFrSv7H)$WQba*@Bt~A6$#1`PLyuHWBf!}5-qU42Pf$h3R8ZFiDP)beE|u^Qh= zUd*c3CAZ-x*TuP|+}XL<6P}Y>tvw&fHI~J*(wb1BLYu@9%a`r<D7*K6T3l-Cq)k+kTxSK3$`JeR&2ltSH)&;`M3)`EpldcR+5UT*8~j;e z=I6kiTkCc6bAJ=&pQ@82=^!O3u?r>nD*H4C-oM{lQTfI%*f%LjXCsMuIdf(v`JzsL zNkTK2Jh=HQ9ZJ#AHrFZie1S4Y`7|P z5!~I6m>*82sd{---Wnne{^>kl^;DMInZ%`&l!brlwIunfo{3E43N|AAS?OG*o2Uo3 zTpi@S+GWGpgKOnp`?(XPUXlzbcPHCs?Vk_m;$nxNd-YP#%y!4gw5ii^EEYi>Sc)Wx z*gtF&J#^ugb==IbY7`EY{qy{{_3q@gb9q*ACa=65!+z%Ed&-j}RQ0DBt(jF7!hJa0 zoWvG*(LWt(Nb+r@cZG@VLzlx9&sLfW{g8!Jp5qm!CJ$L?WfNzn8o`10+`R7RW9wV4 zS(?o4o_*%oW{ZP4!Zlz@czRy$0Y+OplXQ`)(>_}Flg{NK&48*V)(%L8oCYNzkM#kzVPWUpp0quQh&+f!&joSlEWzf}0o;rMqAr==4d)VTt6oc8#wrqjUjQ;+>4F5Pl@w&DZH zBZ`j%ui>aQYtLktH*q5MD~Edr^%N*QQCGgH6IJphh;NOWEu4kzE9-d`@>Rt* z%Usf3vzfD&dT>FR`}r_|{Qtm>{e`rD*5@LR3-{f02+WjpK3}kt2mL1fi)jC~jPTS> z!b5FDUTzg}oHxlM2%pN%@z^!yPKi7n6sm=PBwrwTtKv^bz^?v}ZSYKJ{Er{yOerGxqpT z75OXijFI`|cN6aGDEuD5&T8`D*}{AAwT$lFMLtIP=9JGT_jeO{+*##*QjZmR=RDyL z(VmUuvGavbBp;6ZU|c&d6yA@#Pfy`R47Vl2y^TChZu9XY@@P-d^9R#;W-r<^M0g70 zRnl8{;Ar9VsDI+|!VAdXCx4DSdc4SgLw*4Ys?9hSCY>s z&pt-vN0YZXk@k?!C4ZJY*jeNsBtQEkk&ly~L4GxP(KwMmlj*z$*YmmQKSB6mUim^X zdHz)4+bQ4iWRVX{7T%BP*%BB37(Jnrh5v`)t|t$iBK$k@)*xT{nS29c)=MWe_V?2d&r%^!h6!rqxy+_{yDftjoMNfqF=asbQIdb1D(KDO;8!J!#9{HF7q9-(4*ui z=VnzM?p>BE{lNtGf1hsJk=9PI3o1H%>QH|H^@Qir9`diOesY^WzYjEe4tEOJ9{DN1 zYp~c^e6{FVL7oZ6F!>e97mniNe$Gu`=Ol2q9gm4TE_8K2caRs%l$PZ?!Oj};qV*!* zAVvPL4cxSY+fS7E(jAZ3bQcaW_tTlYM9KFhkCO-6i2S+W#?I{R67CDsb2E9t z*TR>R|HsvHxKqq@gL}B2P2edG-mTy#l|Q#rPwaQm)0T!F4L>#EI<-2wlK-asNb>x{ zgnvPPFL^;j;U7}Z2jC`L8=n374Y|4Y*L*DBO+8L$(bGcy?3{#zb@Pi{QWfnZxY3_R zIjiR`a=+rMs3+2;vi_aqF>@*^1VP;et3WI9-X zSw#8pY$>OyOC^AZD4(t5Kc;+amdGDz?IABxya6)E_*-1@PUJp@8=#*4_TQG5#d62*6sM->lj7CkE!zm`0v_{-#L6>qae^b{z533;L7>&S}~ z|CM~J;@!83{$j%(U$U!vrjeJpz7if56RD*i8Wr)%Z<;Vbe~#anzL z`hAL@OP;3qLUO<2>&Vj;|Cv0X_%WYKxEYF%A`dEl4|%5IAChM)UVoeDAEJ0a@{rk*_7c1VoSi&t)d>VON@zvy|ihoP)bgS(DZBfun zx}_@qFu70h|B|OE-u6q;<5xU~JYDg%LGky6e_&wwc6@Q;RqIl-S zd9mV;l9wp{19@EWW3V7(^p`4r1G&?qvj4wHo~n2)kLd9!-j6&@@qBW>;xCY=E53_7 zp!kKgB-{+e-y;typ5_(#OvQ(hXDj|F`4Gh$r-+`A;@RXA6u*l+toX;|vlMSuTlD8E zejfQk#TSuB6fYoOqIfBJRPmvy67EXH?|u2@OTh5XEPchZKK-e1hV?lZO@WgM}(n|IAW+ z9(lgvZ;&rkymlke6H$C1`4Yu%Adf13Z)4H3Qt|(h#}psXMC8{h{t$VA;(wACDt=K@ z(Nm=OLU4E8q<=@Zr)}43-l6yrr;+4pG_WBykSR? zU#a*RC$B6!P#Yd0_6n~IBL-DW4gNmnPgNkW)G8Lah zo~`(5@*#@^MSdl%6^LGelCVZ|RMpQZTMNeoFMv(6~BzUMDdm6am9C$mnuH+MA7eLRQBiF$x{{I zOzu;>{Yj!HP4V&Me#MuOrz`#uc|h^Q`iTAv#ZMy-DxOcCsrVb@*^1XZS@aK4d?_@NAe=Yr}q{8amAk?FID_oa_5A~ew%rk=t))l8gifFFOsJz{(t0t z#e4S?{ppJ5kp~oyk!L6#Cl4y#v%l!iR6LhFTk)sJhbUf39#Z^_(?$OT#be}Q#Sc3} z>*&@^r1%$O{!OBHya`khNmZ z1hr4^O7d*A?`0WzNbv&lA&P%#^($Wg1qpYS;>VMR6(2>OulOSJh~m$aFI4?_4(sQ$QY5{Cjdo>1p)0gu7Ms zTY8cgD}E_?iRP%-t2(!G%`vim9lE%3|7U3i*X0#&3qLpMA3j#n|7NCt&l)w|dTfG{ zzmR;E;;BwRb*e;0ZT4=L`cDLhOLweBZKo=@I_{5J9^c|-EM z6WDWJZP8zJhS*a;`7g+WWNZMmSCrtJWk$=5j)ApbQ>yq-lP0KTrBRpmeF_@dh`FdYU{&x{1}3zgrk$4e05^UZ zxJBYIj>{)WRPowLK0`D5f^ z#or;1kf$-@caq007d@}i&elhW{_u3+Grbb<@U*JK{TcRGyo5dbfgYM@g%I8y$&BrgvBZ@zMtfXgjiRekAe%}D$LFzx1 z{CM)nts*~cN zAMT_d)>(chUjc5?C!2PDMBcguxe7OvJVgFG^Y=Z=?-YAhQ%|&|=m|5OJCMIi?!QOm zt-t+29#FhDDQ;y03q6!#rgwg2zoI6X}HA+)m*#vLYoipY;AKRSV)J_-0a z33yHdz5v|RW6tHh+z@U0JWpQ0@)Go=x_|$MyoQo*)IjvqRJ<>FI=O$VL~IJUDZi2X zBwysej`IlR{YuXV>xM1Mr-zi*)E$zERB zo(<$7a%}Tz93T1b7ydr^Be)pB#4AMm zVYd5O&vM~sI{Z$@n$%k43usS-@}H3xzaaWCP3nGbI8x-ZmkPJ_UX3=w11|~xgnIr( zp3ivQO8ybKkLiPMt^28ghTGU5Rq5ZGJf`w%Bzc@1IqiOykoy?lRPq9H|3hNuO4ehe z+e)}a4-5Z;e2e8Ph2yw~`x)Af@m(YQL8i|eywwr zBZ~i*JgRuj&Z0j?4mWl`x!}g1L&~r2A)lc9d_8$s@j8vFF4u8}8+lveYv;|{c9DKy znEk*I?a2T)BjL+n{J|~i1PQc-X24q{gueSO8zFf?=#{5Vgv?t7d_!3;iD;k zE4l9*;r70%o#X|d3x^xKpQ{3*CtfVP3-y0T9x4%jqYd|1kuUy0_^GUDZX=KXPxu9t z|DD{=bm+u%4)zc|+24zNkn)dPPWdsE|DN3crO0=r{JFTO%J@S8<&UKNJaX+P$0YE> zfeHAd&{LykJ*V)J_O51IuZ~C{AM7mkc5H{(*^2gzW`8WkdOOW8e|VdAMkwEm@@;XU zoQYQv`4f!qyX5&livFL-L&u4HI4<1Q8@G_h$R|<0gxvqL$S)`F)tl-0i}2OtPlKC! z;ERdU4iY=l5~RaJYzIpy@8=h4v7cG|tLVRs{8-yB{Y|(ncM}FldWQZGeq0ktpdM)N z-1f17c$BT5H_`qO^^9UgQ0sW6=Wfw+E7SRGa5LU4Vm;$0pGh89{C;v@17tKle)qqQ z^BQ?T@!jOvijT}-I@A7Zsb?K|c0-AmZBKWQ=abv?X?}v}iICg!btZWMc_#HgOz!iE zq@;%9yiXn^|B~{JP89utMwR8yB@dAgp!}8OF>-6qbqV}r4)ZYn$)n_|^S9 zO&)A6l55HTv^-7tpX7__C&jeqa`IQmV~s_g!%8Q@baq+@KbQXb7~*C8KXsD$|4QoF zVAFwm&LA%(k2DcIZOJ>H#C#{0TE%hBB9D`2QT{6OU{ldkz%9U@)-F% z^16K_T%FF*1o`q@0^a5uI)Jw-P-#&g_0VGk@cZmrb|x$@7_R=TiPEawi~q`jW3C&nEYi7mK%G$%|CFbvRl4EuZO7n+@k%3H<6z%12HSdu+IK$z$Z1^#3QwoxUPpnj(hQIYq*C zGKJGj=LB-C|BD3n{0eTyj|D96%r2)zW8pFKRHjdme68Z6Twc%F$o_-1=T6GMru6@a zcGLCSi^YCM%ej>GWbiEEEojhTKZ$&RydLd2={WJXa7g6alD~(2eXf6I2|t>=JND@r zo=OqzH7V@IOmH8ab{{(5LP3KR@vlTyu^UwKA zA8XIWEhXRm=T+99!|_z$65*&W+|ONhoJBp?lXvF$D0H^SFQh?tlNT{wc0Ruy`%w|b z3*9{Ta|!z)0rE@8Kd0Y@sHY|Q6#A!+ej6lzf;^kNDfzGDVe%`;1E)$l#K|!|;(n%) zI|C$M{mIvmXOr7{<2S2^+{V`*6#W7ELmTQjlRQFxIQca40_8vVkQXVwj=Y%MjPN?xG&&*a7AHeRjzi#-m@ujT#81FBq4BoC6?d|6B$ zB0r4wZzKAME)c7ziH*qmvpO7 zUiS3bhnG$Z0^`Y%w zbt4a}`t~An-!Rd$f_knZFCe$`3D1+qhl~77%Ku27&vxWA^25>48~-dK-$b589vdNg z(2TmDJS$Ir1o^|{adKNP{g>RQ+LxvSBwRnaEnlaRhm`yn@+i6W!zJVee~)kv`6LpndCaZ-a~>^HGf``fbUMg8)74nN&mnoN#_Bye{mn-`Q)9+-$%n| zf`@=TB&T)AQC$8jY zP`*&fKT7#MN`52xCdGdw-==uuza<^ED}Frr4#kI&mnwb@d9Bkc`@_TJj^b~T?^1fs zu4(oUw{U!lH?Kvm^voeoRs4Og$aheBTBaIa&nZ>&oNW&i`3xoh5_v%Jp$$YnU-44% zu;Tw}B=Ql(-z8tF_)hXW6mQyC^ej~T1o9<{4Q5ibZ&%i*nSZ{egX`7;NKo-T_2Jy`e-74F3tCz@~z6+ih7;WwMDZN*rHbE59##A)@)e42B44TaujH!~Z~mIt zpQiY!iLk0SRg`Gw>i6kkoAuJ}jf0mc6$@1=PA*Tv2Z#k0u!C_aTesQ6vvnTo$e zK1lJe$g>r%_iwT1e8rC?AENjW@?nb4B@ZdSf;^`92J%vsFTaqlQSvR;i=BIve1G!P zGb{HK#*^1q{3ddr;;YGHpyKC|XDU9Ee30VH$g>rHi`=jB@ptm`m3-?r#m*s$_akqm z^jt7m|k+e}X(u@sG&^${z1q zV$Uok-;KPNk{?1oSIN&M&sY2b@&$^oCts*|3Hc($>unG_BZ?nOp04a1Mt+BqpGTgd zFI9Xu`2?lssJF$=9K{Ec7pQt^8u@y~?;tN!{CVuV$UYU zk0Rfy_yF>4isz6QD}F2acEz72FHw9G`3}WP$>WN*c}MKurTA&&dlVl--bbbXP2`Ts z$EV3_DgF_8RPo=)S18{6U9t0g6>fj>l}dgr`6|V)Cyy!q82K8--zFcX!u^$et&(r? zp4b^w@&n1&Df!FE`z!ee$qSVHo8;>i|A9PH>1kOc_7p1lzT|_He3*Qrl3z?-r1%>0 zO^Sa+zE$x(0b! z$@eHenS6+f*D`V^vvR-aKjgI(|DJq|($n-qu`^Z4_aU#Z_$YFp;y06rl>TSQn=1M3 zgKVNbG z7O``P;%AT#Q#^+}r1`8)znFZM z;?I)LReTG1zT$t9FHpS0$726N#WTqlDV{?fQT#UYI~0GOe2LXEWlh4GSV#QA(->&#X z@;p^vEg_$!_jNV$TA_`;#wJd@A`O#g~&u6n~5S4#jtoFHyYRf5o1q ziVq}@Dn6Thh2oD|d6mB(lCM?u+5r<2DO&n4fb_%iZR#or>Yl~uVP@(X#Y;{NSoPkqJDA@?bsPu^7V zC&|+k-$vd_@djT@xPHZhwsp2=2uTcCI z@|BAJK)y=xHs4FQF~!dzU!(W}^0kUTOTJF=Z^;W3Z@xpqEm7_D8RR<@pGh89{1Nh9 zihoF6s<`(D33rd;$B{d#9=m|Nmg3XNXQ_Ji9`aNr{{ne^#XlzZDZZP$sp2hu6#LT@ zKZ(4R;={=Oiq9qQp!ogd>59KX-bL}x$pea~{3LevQoIX!hT?&C^-Y{h$!pRf23^7YD}uOuI$D?Xikm*UIG*Qj)OkGxdL|3bb;@z%S<9%o?X{?Zxb zwG_XcJXP^~$?Gfr2Dwl1Z^@e~UguZZGpMpX-N+ZH@pUG7n$nX)UZT?H4)Rt?ejT}A z@z2ORDE=3DEoEoh{}cOD74Ju$uJlYKU$5+pkk?mwo+kGx{trT=&GbxMDm-^9*< zlJ84ipybDq_fql;$ukuH7kN`<=ey*6l>AQeLKW^2rDA8A(sLqtQ0W;?-b%?|P2OM0 zKTPgd@^6xNQ2aacOr@vc?_%d5#e0%xD}Dj_`HIgZAENlZoqUXvA3#1q@hRjvir+>aR{SaQJjFjGpQU&y`CP@D{~`A0D}ECB0>v*OU#R#M z#+CjP$%~bqvE;jy{4M0$l>EQR zOO^Z%@;!=Q#{Kif+^=rW>D&zN?tfPM6<3pQQ~Z7MV#RlnmnhzHpy=PF_(|lYijO4U zqxf~?O;!8&FnOBdZ-$_11@fXNLihoW%LGc>6 z_|fD`p5h(IXDOcL@_Noz?q|09TE~zVD}EJuiQ-GhbnAW2(HoOTJd|Kdc_b)6W(=Hz|HL`BufJlW$i%O1?|+ zH_7{}b>W@lJ5)da$ZWA^kK&o+j_UVbPToiHD0zRy-y>h5_}}DF#k-#)_N-L(%y9Cx zieF2`1&V(}zFzS^$qN-f`dqPRqvB_i7b!l2e3Rl)@~w)$LB37#AIOUpZ*rd4 zvt99%$V(Itk?&A^q08$z_0_)a$H{$)KbPQq(6$6T4({FqJVDOUbm6(V5#8i`O~Dx9 zXK{c2RGx>5P8Hse^7Bp+`QotfvnjvlRN?vD=Wp+wYl4d--2MM!Mc$rMo_vO!3kY2% zoa&uPJO>@39$qixjA$?C#p2Y1$he>e=m8M z_IykEzsL(1Ut8Z6@O-pS>Hj~TOASquaP4`AjmSrn4*p!>movh@k%yT6=P}%t=QF$0m#Y_8yqq$b(aa52yTf7mEHE)7hRIjgl9Um)TDqxLovHNj!jgeZ&l|3yMnv-@#c$s3eT}#3~ugW zC{p(-Tt|85N|A5Pa9<^l%opyX|2OU~|k&*O~PM~rWb@x6r(S9^%0XOQ9AbUueXn|kbdSWmO6)2A)CNr$+)FCs|3mE4|} zzq6m%S-eQ>wEADRet5m`MzsH1%Wn`~hw&XoKZ%m3ldq!v(Q8HiB=Wb&i^=Da|3DtT zPUJJlQ-;!?$*&F_3b zG5PQGt6rDT{#zy7@2LMu^3ct~aV>!Rxhz5cUYmfgNWc$gz7$79|7@0*`;ZRiKC2k- zo3i<}p6L^%yiNbt66oKSfd7$zH^jkeQ+|E-iaqw6`*SuO?hyVs8)Db(29us~64+xDoG_GgpVqkK4l{YQ@wJt4}^r2IH?|60+%f!rGs`C`i7&HU{_p1)e;=aG*f z_r-+YLLMQHJ}o>-{wjI!S>dgjUpvV|&k28l{Iro`PwW-pHXrAZ$7%lsl@)-G8>PbZcy8RHQPaE=+$b;l9 z$gd_3kz4zpBae``r2LoUF>;&!?Zz?O*CoE=DL8HNXNU0r zQqPp}qQCeD;nr_oCeNmypu6IJej+dWQRL5}{_7{u&bNivX8vv_54aH)au{ExGtG1^Brj6@dUAhU^w|30bMn|u;kJJ0 zJ6ZIHeqp!_;4N}pes_}V^3{Bb={`bDZlTwj8xa17~#d4?N4*6^3 zKQxl`Df(69XOK5-Oip=g&x1_Q?Ee$_<&@t@9w)c*_ruPr`d2T~{vhqQ>HIT!riyP1 zTrgt#dyeNAH^j>suBm2Cyb2uQeW~b`W@5jurtp7JzWY?+ky5dLH#<5J%l{H?^}j;y z`&;<;)N@!^^f(OH#`h$0ANgaHKLhqxl@6VIiT(ME7h>ps#wL)zhUpMak$4qSeigX! zpKO(GZ@V1#3Da-wyu{y>AEM-sL4|M9IXhMK|3v)*$U}97ZzP{b9&9T7Db_Pjo-FYy zsw4bX%74+9{4n7Q$tTp5^b9u?ekb$s{U*W#jIV76uVz0jNc}fZ&yg9Fr#;rM=5st4 zW4M@hb3Yd_e~ao#xV`y>FUj*Mk8ZsCdGvD8A7OrB`0swa(}c&!>o6VGlNWeJ{}{@D zMINjz{A=?1xwN0@Z|50KvYd7)(zBKI{C`BrQfet$50F{p~S2RHr}KSK1(qW)9KomRphB5#Re==NJEe;IjU z7vaG+B5(Wow{ZTmnB11HSYP^~;=kE?kNOcE_mka(`QB3Wk0QT;JlI0G9XDOY{-KY0 zEbn`q=*gzMo!|S-)}PHq&zaOe-PSX-6VY)$50l5J-|BhK);~vzo-ZlCn>6uJEzk>!+z0Y{XC2L9-|(cKKGM5D!#9fN62me@OScXn&>}`_OzQW=~F;HiTrHx z0Mp0HUqPNvK9%w>k%yVicHVV|l_$65$Uj5E^)r8MfAuW#82!-lc~-u)*#7~;T|*vc z`phQZP99-;ZXj#s3R#%4w{dgnJYptA9NRt)r+nymkzd02t|WJk z6OQf8?x!g#z#2|Hr-<#By-%`$@r^RR<0=0OdG=AFryhCwEJc5&cmh6S4%6WT(PQV;my`R+?Re@9aC1-E1mXcMA0v~<~WaCCFz{sU%2gWtcRYe@~b2PUyI`7_UF$KJp-6N zm-QANAsjU1YtN(;sfXO^ z_nt&OwDTj{;MV33djeNH_4~pW{N$iF5FLTf7R)E47l-=waiDKBP~Wh za8r*J4H7-~x>_9P1nLh^zxB^i zfc6|keinI%+^*{_u=2{z*Zoze!)nTh2a7%1ssC%M|19ChF~SY!i9N*^3AgRbY2?wf zh1>qXH1Y!Si>UuDa(}kScW3|bJ@W7n;eWEB^39iUv(FP=mwHYl50TscTP}H++_nq% zkmr*(r=9}xDEVOWU&-U-+nA3T3&ft#NU`ThR+Mwdi!Kyy`)Lo5`(4!}m2iv5?YMpxIY3 zZ>2xnMxH-Ok^;S-~Ab9 zeH#mly!G>iCrdpR%@tluJ$18$=U*=TVe;N-l0H%DsZZX4{q6ibk&lw!#q{-V0619A8WjpKKB=WHh5N{D|7w)W>p7uI z@eh0&x&L*XxJ|bkCEO9@fr#{qkMV$oA@o5=4VzZTJ{%HLKb;5VUxHui_^6g}1t2QmGNZx?R; zJdZqZhwzCEclE7|7rCvM9;2PH#UgLV^>33Gkz4&c$^8ts8};|PUG!&@+xBiId5qk) zU-y$ol|3(6dGcP={~39ld_1{xhlCqu`kX@EojgX4!}$8|7JKp)pF$o{{5kTd;ycJ=ig#Ng`U}Wyy>St^880tn zdAIH6R|)K|)j{gX*lMwVFcpnr`6{CR88%+Fep@Q?hu4%p9LI9xe^@wTf$wN`$ zy(u4~{n_^lx8s_R$)n`9oc>MjyHDgT?|2W>S@FT-aq?NTe?EC+ndo_gcCIClJ}A6D zd9Uu`p>65rR2_&!lUHJEtPPiG2!>qac?1ytP}nn?fH;AxJG!8y!ri1e{$Vz&BllCjAN99EhsxCR1<#AT?Vn_k2iFR}jq>x!i^-2Be~R4q zg2-o+|3DrhNB7?S9KBq^jlU%Fwp?F8?l669yEC8MM{dViZ;%JbhcMiJQPHpSeIB`v z@2M!KRke@967abR_(R~PzY$dRaFeEDPpnYli)kqLGoR%yxL)|!TEdsnpNsx2d<^AB z(?0{g&MvX_Pff-v^oq!z%7j}@p8vA&w$$?ld5qk)U)~3pFXT4d?&MM0gKnz($pJU( z*D>~&>X0Axpp>tuvU4!`R>i~M#t(xV#m?`j=N0lIrW>Z$+|M5JxZ=k>Bzk;riJm$v zk5kC~8-(Xj&m-g!a!en&pE!9D`B~(>R)~K8+oH$r)4q#5pWL>m|0R!*UrarLhnddg zGHdNP)5!zxi2i@k{zolm`q=XF4Y;YtCaCl*gdbMr&n4jQ{5JE;uFD*S4wB*ZN3?e> zwETYZV#;@+{nwu^{lnn&49^UIiZtxBZrLt`;8JCVVmVrE9!cauAe@1_4w+d8@_k64vR6}Y2e=Xl-~jV6#5%^ z#81O~?1z3uW`)L|h4{lHPiK6d>aY}if8u%YI_8p3xpqC#2k+{Ge~xzLe$qVcdiPr& z@vF-YtDk2r&ucg8a*k$st}wj+-#sDR*J=59vxAe9PRy(OOQHI2?D4Lv+hH4jp2YX> zzmBJXd@Rp7-0OLb_zfL8-}B&W;|G3;Ur&y&yEc;XnXB>tKEeAJeztl zTf6f)SopfKUe}$jhJo<3I3D~_@}HxGd&_uEar`BL_~*@i@Mrtr-w8ME>fC&&W4ICP z>AKq~&-j#AV*@t~ys)+Mn=su5@O)hUA)i6uSIVF3n&bTgbxXA>cvv<>Tgh}cKThju zrB{!F@Zq z^V}cY4{paLM($ZY&-W*4dpnO-kF7(y?8rFH`&- z@GyTJ@i};ofafdlH^}n?Jj@4A(foaH?@kR2a!9O6+QSdNd zhWNrF&ByMi?gsyqJP*Rd{5`~n7OUqswCe}(FUj*8Jj{I&txx1s_3VfBYdH8f??w@^r$(yomU^Gt~1w@)@+ZwzvN= z$_Igag`0LUuS9&^nHpb2{3P&I$TJNd=Iw}&oTVN+UVReyn&ipC!#t1p%-QO>1^GMz zzAky5hKKpT5g%Hjo(w$wrfPjQBF`GaO?{XTLwo_A6X6*L{&(_BhKG3q@sXvP&rEn0 zg8Rs`2p;CwA-(`l9-arlwqa;=Sg8NItSSIiI(|r;_JAc$jqJ%x^%v=VG_#Ven@1JP8l;PZ6Jm=TYSI3;1mE43rag zrasKK5pLG&x}4@?JtM)BOl0J$Am|3Vs-Q?uLi? zn}|=sW9NGj{Alw001xwx<-!S5AJ3)k`L!eX0`gP|H+5#dAL6s*X#+orJRR^bKO6DB z%QTCd#~1Z+5U4F{3h~Dfroht@fmn*olge8jXX=>VSYE_J=bVH zhoL?%gWpY_x8Py^U&LqNvH7ek1Au0|E0AYX;if*!cSF4AT6aFR;E#~!0CAifhGyI!mS z|A9PT!NYu$gS8Hk+clp>Xx9kv|B|OhxTz2GG~&C+a|U?7W7R(h{$=nme+u!*J2d`C z#D5C@H}b56hxs-#P;9mjMR;tx#(=Luo@v5OyO05A$CT@4s6;a{IjVSx+vcG40)yJevqN?P5L>@pbpOJ(Iz=AkV?@FkguH z?7iw)ANec;--bLl!^8Yd#OLo*&ld3f1U{5JtJP_{mW+lco*=+5V7@Db!$qh8y^yi&Mnm;WKRXDaya;!1pCj0UqX`AU^ku=3_mB8nwL#l4l*^rd`Z~h>tw0 zo*R+RUf^})IS?M^3lZ;mPCa(MUj`l{&u#E9e;4r?cP~R zo?rFg3G$@jVSW+fJK?c)z7M>eJdeP`{6oZNp4WT^_R~VI1fNfy{xNMA^R0!O^)zcv zILgP~$21cBDDsSlhj|;~b1$mrZ^-9l@Z-pH20YAfL45ut_1OCbo&Z0QJg>sT{8z*m zUslf!$Y;}LZSP|8Y%Sc(cjjTl`(9Pgcz6y1KZ888;bDFz;*+nbXLoq=;7iGKCp^sG zM11~r^~mXX=kp)%3&_)dhPI3O4#LfR_q?edyZqFEUqYVg@GxJ1c<)>4vCGe;;8&36 zR(P1NK)mm5_1O9T3;4CeyE-?@N5izKY4t@O?{Y;MtmJS zayaIE4gi0cJPq(LUtm2Sx$`+6{0Z`00T1&h5ubcLKh}KC zfoBKs7s)eRxTz2G>4^7!q8^)18vHf#90d>aWr**D$L4c4_}k=p7#`*yAwK!3yUxFW zzek?G&DM4?-%hw$ehTE-1N>w1)WE~M1@Yd`G@o&(^8)aHljl@;nBRo>I`TXL{xx}C zf`|E!h|j}wHu70l29!*DzbDUT!p(eV9zuNNb8VOP902|^d1CM|KN;~wc+N*Ymw|hZ zQ~x0FJUq-_Mtt_)8gD&cfe#|jkMJxZtBDQAjBuhlLlXh zJV(I8{368X$+H}M1M=Jl5A!a>7s>Mj_$K5T*rx4bzP)g>ocO=g`pD+R`GgVA*NwCw zK8JW&{G88;;9Jvlm%zjPUc~#q(&*$5bycc?YRVe9C@yThj}OBGvBM{bj+_$!6%UCTX>icK2+=D z`$0WN!xI3HkY}WDQy=Ej5ubx+1w2XceaLedJj^ddeC9{ZCkxMV@ag1v03PNaA>R9w zdd`H$lhXD^$+MPl(=O%}i0@kI_DlwEB2O(m%nwC;=4bWD@Q(9Y03IjLDey4A7V%y1 zoB&S&yp24M!^8Yj#3%o&`PlXJH}Euh2Boz=%(oG4wtHRhT!?%|fX^e(zu;khFyb@6 zXg)c34h27wJcq->{368repSy+@Z1J|EO`p>Fn=HMUF7*0`~>n0YS;QO-%hxxPwqF( z$L@ba;ETw!FFec_B0l69;Eu}s5A!z=-wBUBKKd4X33)sn+AikX3ODWY z_SbygMLv6gpHH3%@Gwsz-ZMZwcKJCOJV%~$;9-8hjfdwDdJk9_HRT+OE!l znvaAzpB=%kAy2h%v)y2RAmSr~)MJm2+QDxm&vEcDzZUU%c?qu{i}_T<7vZ@c9&?+#Ssv~q&ynyjzXI{ORWzTK@H`Cu5P4pP zhxu>Tv#NUDfM@f0n$P3p87kbgi}@tPXI4|srSLR^KSQ2Yc$hCnynl7|Tn^7N@E6E) z4Lr=BLVOoI*TVBI_^agk2p;AG=W89ZYiK@p|GqK!TjcqNa8n=VyCL4YrrQ$%f0sP_ z!o$1+@mY9eH|=~*0{@6Sr^CbiX2g5f(tM;_>wF#u|C~I}!^8X=#3$EQ&y(=1ewfy+ zNS^hCoBA-XKzwK&^&AdQ4fuEDnFM}r z^BDMV-)e3x?dXvzK(EHALf;a&#bTc*zNpe@YTq(FFefW zBHp)wdgOG2^H~hOHhIp5hxu)Y&%!eSo~OarC(ldpF#itmU6junM`(QplV?NWrasI^ zAU?98w#%NUh=BitJk#M}ek9^M;j#5O8+?>F+9vyIa2FT zfM*LFH*E&KBYB1hH}zp&gZQq&n$H24U-jSt^2~&X`H6@RZK598T{@o&!H1LQW_Xx) zA-(`lGdu%i#+mhESMsbS+_a0iAMyT8HJ@sX!|w?mBF|KKn0FvP`FFQxF?g6fIe3^q zjrja#ZqFCsdy&T@I~3C{<~s^EbtwMB?U@8Vg*^Mh!+aj%{adIfgF2rEK8-x*!o&O? z#OL983Z9q14D zHy-AvAifCCV~#~G%LJcEo*Up{{sQ6)TWLNY!t*(JD|xt^FH`vwY2aPtIT#-1MD=Ms3BKZ1CF zrFv|AR)Bv-p3mT6zK+ajXFCs%U4Djwe?^{=!cBdcHz3|urTJJ-2K-y{oD2{1>k*%a z=L*#M5%8bL^C~>d`=6xksvEBPya3Nu;J=b*JK?5X%*P`>RIMJnoj(wKz`|`E^C0+R z@Gw6S@fmpR{K|o^N}lWBVg9`JjL>{!b#^|VgRe!N|G>k1y_2;*-jV9D=P&*Vz8-mY z7H;apJc9TfJa&KD48AdW67VoT9r4~>-1%Guz8QILhKKpfh|j@e^Z5dNOY(dN5A%&q zL49_0=d&aD5b{(BH}zq@AL4WH%*Om`1K)u>^Wb5AKH|N*x%0UN+)tkS;9>qA;&brK zMm|4-SCQv$i?m(Lw-@g8qjuMP>~ZcG@Lk9=86M^-#3$jIjC@W3--A3$;9-6j;tTNX z1J8@#W6AR_Jj_>JtnKpsOWP%@kMr3EyoNkM;by)wKLGK0@^pYtCQk+)=9eKpw1?(% z9rC#syp}vq!o$3X_#!;=r}J6kRBiA6NpN9Ahd5#7@ zjXdYW!@LvmMR@FS+)D7X$g}R5nh*0)!p(e7j?;EoPc!(rm=9l~`S@zw`80yxPM%|gn|zqxf%q(WJ^;UmJpGrdhxth1 zraqp%-1)@7A0W>$@G!p(@k#P@fj>%~mGCg%=^V|ci#+>)KSiE);if*!uS0xfg1cR> zgLjgr3m)d4b2Xm=c{T)pnLL{dH~BCRA>KdHozH&YZ;&Sn5AzJ-bMOSQJzN6*4ta9$ zFnbgS_k3*^^WBA;`uL`3K6YI65b$5f z6N88OLd56bk;7Q$a~^pA7P&=6fJMvbW|V!w$}88u+^8IS3x+Mf<9`jrclv_J?N@__h?k4?N80Aie-k z6FiH+wANQg-KaF!&_$%!G&eBE)C*({{<9&gXLQspPo<9_F2h_wDcY zd&Y_{9_CrZ`=_fX>Cw;i z;LYT@UAW1I`5TDO!6V%y=kp)%+2r{}xbZOG@)E62=s?ZKdUgR%k|!+Oc$mi#-vy5y zr#%+DgFL6e!~A;0ClAtm9@l1h9tJ;*JkP?z{Am z)N?iRnGC*wJp04L{BXn<;JFE&Gr>)qJeyHSkl(^FBPxSGg4Py-q#9 zAs-+3ndI47xTz2G{j8^6J#Fx`gP%j5;uo^;1`nTIe3_Vg?Mj+=41Eo z1LcH~>0d1)&l;EsSoqXh|j`f`-jcocatXt5A&sn_r)|H z>$wTMK%P6{Vg3f<^W-UlKSG|B@G$?!6<9Bt-T733KS`c{2{-j&UWfQj^2`N)jy%W0 z!~6=w*UcE>%qd%+`@vr#&lB)4|2N``?{yuqbg@^eH#OKKK3-~AGS@kMy7xNv3o9%;tmOGy?_!s2a8y@C!5MLlq z7W_Ztxd0yK4Q&U`xJlP&Ii+QIvs zu&uLv4#M{27b zUh*`ZRDf?mp526-`Y=BP z@pVaeJ{{oOkmo3PnCB2*B+u>OL&@_XJj_2reCAMhKEHtvBhPAbV#~CP`HsTPdf`pE zJ!8Qu$TI~V=4r&|$g>E11bLRg!~9Of`_r1wk64eM2j87MZ^6UdbG^39->#l-;MoLx zGc)I|AmM7KW@ilfG5avB|OX@ zMZ9;u*2nh4-vDnX&%5w2@0Z7Xhi3}vyb<_(@@y{L)Q9;Vi0^{OjvMa-eiV5QhKKo4 zh_5?L+cg9EoCSUyc`ky7`2&dWgvaLdI{1m?`2-&3UOA97+r!A=nokk=>;%4;Ji7}w z^$g>z8<~JhVf24ZG+Vui_DS2Lnhk3u7wOtu_?D(n=`~vdqEZnq< z`F@D+B2PQ`CFD6C9_BY8K6I3}%dYRwgI__O*Wh9PBjPjgJc;_Ob&Iz5TJmfl+_Z~% zCE_DTYd)i}ov#7UlP3ZX^CaSP8TH6;oby=-zMMRZ;bDFQ;zP%%rv{#f!S5o^)9^6= z0`XaR?0CX}TeUv-lV^S5rasJfLwpgQvysnq@Q2AW3m)cYB0hVpwyOo6o4}tS&+YIq ze+BWL!(f2M_Zn5$`=oJu!H`0^fi zF+7vOw5&1j>9wg7x@G$=p@sY)v&lT_t zx?9^@O`g?-oAs3Wc8K?!s-AP;*%f>@@{ERuc?05e@W^Jw`OF0$MVi0_1FSLCzNJzAd$@tx#3 z5qux=oC*)~>k*$l-JQ<^;M2+T1U$??Lww{6x5sm@)+b7yRfU`SFdvF|@0se6Zk6-d z1H6emHSjQRL3}597J$devlt%cHzGcJmOGzE!Q065JUq<5Lww|Hw`cA9v_5I_Y%JW= zhxtgvdzYvui26jp=aFZBc$m*ed;}i5{W%T%Nb;Nu5A%BvpC!*r;K!0@1w73A-LG}% zq;_oregb*65^n0ld@SOT?c|MB2P0s%uhyqgglpmpH7||;9>p(;w~}W8 zJj^dcddR_p3mpt#l z!+g-gT8AP$cHG7X{t$HihRE@PQ|7>ySaTV&3mj%y)R~c5E>CYUJ5MxbZOG1Mzj24soVv z+cgb*ZSvH?!+as)i||ZE{uhIwsapwB{@^>3rvV=33lX1#=QZSWA$Wj1*TBQP6Y-g=G@l>f`5b&Wd47b4`Q|dP z=8Pv?t)4gG8412Cc}590%QN!^#MfP;o@3#e2Oc8N(eN<881Y4T4u$7d@GyDqgNOOM zh)-Ut`E-vXf$v40fwBQN?P9*2a8sWycx=CS6!;YK)WE}h7UF%^X+Ec-T^aCc!0pxic9_F7RK6Ab1b2&Wyp3?RnLY~!xoBA-{9`XJg)MK|lA@D}> zjDv@H4DnfbY=3?}_)PL-;9;Iay!S@UXQzJJu;t*bwLU5G ztR>vkhq)i|Ie6mm>O2Gd zV)C@Z!~8tN`@T?*?dL29zl=P0!^8Xy#Pj>LzXZRUJm0~?e8cCo4iV&Y8S1k=_zmO< z3ODs(UW<6om)fr9;faIaLY^c%%(IB^f~O9i%fas;&vo!He;V&o2sia%J`wSu|EOmf+SLI57-cT^E5rO`c2P zVg3N(L*HsXZy=wSz@I12>+mrD0r9@?+@95C0WkaPSID!Ta8n=V6^QqIuO8d38t^yC zGX);zZHSNj;LhhO#QRU))@f-y+B@Y1&8HaO&LP9W&jimQ-i{|c1MWLTJ<@#Vv-6AU z>8e)G5X_e_c#+}{0WU04PYXO32{(1~FIEnX=WfJ@z<)#hH;50N>W*Jm4j4_HBjEc% zG)K7c=TCFTFGYL-{73j-MSLOaj{gbqo#5^8@A|U(BWJkd_ZM#RsRMr>{yS~_neO;E zY&>`o@teMacAcg1>tec7g`0LI&vttfh|hrE0?(D;y#4&!tLiT-QO}8p9}6B?s(dE+ z6*e9`5559Cd5*?gK3EPkOdWjZDwoqz&S$D{lYbYuo$nVTzUw@VAA|UQudBy@zMEHq zXTZ~lKMUM*fyPe)e-@nIYd=sn2qsT{PyKG-{NDL?aDI>cmEind__uBRMViks$Y*^y z@G;Zn_qI<0=l8H52hQ(RzZ;z2lm3JCFVpGTe&9}T>2#CeS>zJ~_gw1E^FnZb&-m-$ z{NC_&-$uJGSC7nA=Q9Dki}*?4{GRUn!9!Q7rxu>?z;oaez_(kW(=CGA=ir?P?!8() zcKdTTIKQ|0Cvbib^{&!!HS>$#E1d@C_e9@l8_&vfC!TG(w=YaEjem?-` z_x28x4W_9xzlZlQaDK1uqu^e8Pwt!G{NCHI!TCM5t9+o-<@eHV3(oJE9ShFyjco+y z_rM+l&hK^2f%_lQ`C^~Dct1G5cl9T5evfL^hnhdX7c~ve?>RjKoZnmeEI7Z1^xxq8 zUeWD8(&_ShLg#?$lbCnD3vs z;~T(}pSq`e5jej`?+tK%FW%sPBmd9TGY)|etw_SvEclEsC#TYz3*wGZ;>axziA>kzmI7-IKN-%8*qMK(uD8S z&+k9F1f1Vz^gcMhpJ?0f)x+-_ngO00FvMB24#E0$JvhG)=o@f;zfZ*v>gV_M91hO! z-+2U_->0+MkLuy~g0sjU+qg~)^L|-2m4CPM`)f4kzL#EFBT;dow zKWF$_aDHy^=ivMt;GJc_)A;$hzG-lNPVcdDLeRwXb9e6p=jZ79*4KD`F78p_{G8jD z!1=kg!41^I&!N2(oS!Q@U_*`P=fuXq`MIx8fb(-)caQ-t(_Vfq>v7=xoYjxP`MIg# zjn%`?LA@57pKH4DV2$VJl+Fj|=Z?M$&d(7YzlnPIxuDmB^K(Ae*;M2Cxt*=x{2b1g z!TGtGYyDk4{G7}haDMLPap3$M%Ll;uxs*R!&m>*HQdoaSZl=@a=SHT%`8kje+4w2y zIRc*b|Dhg!PU8e{e(vHz@IpjAC&6<+I6oKh2XKDQVPJEeE4^Ip4)NY{Cu{D z!1;M=1GZKVKR;~`aDE=zk>LD%v%A6hd1V`Hqkew=SSxslo+tLA<@H(zdmQxlZPmli z`)UN|=Xd=C&d=it3{ek1Uuy<9KQHTg8;|E!+2fZV!1;MrRsV!Prum<2=Px)vZ|WxS zP_sM!D{y`u)DGLJpP%nE8=RllbTv3Xf9Z2@ex6drQ1$ckk&XuE=N;V-&d)FEx4nA! zc|>Et`T0U8fb;W$o&e|P|E#rx`uTZ2Q^5K8JZFOQ^LAbY=jZ2ayrcU0c{sJ;{Cu0! z!1;MKPl5CEXV#Dl#?1DOpC?lT&d-NA37ntz@|flAy8Os+lk-_+nEL%4%4IX}gWc@#H+^YbNs0q5sM zge&2v=RceY&d+mr3Y?$MFrZ33{Jez%I6psOB{)A1p>nu-`1uA`fb;VTis1bGfkD-1 z7u~T_wQ%H`F{MTt!JSwpH)~+)*hkx@cr_8f%E)XKje(L_as)z57j)3$1 z&L@NO{mb`)^Zm#_f%E;vJIamoW_!r@3pauD{l8a%^ZmSEg7f{kV|G_R-*0;{INv|} zGC1E4yUxGV!}qt=fb;#TCxG+)rw@Vi{iFl-fS>N8+yk8N_nZ&T_ix?~&i7+}XZ>_v zWFVx|<@+TM1?T%8Ujpa*88;ZE9=<X)X(>?9SqL*qb&vJ`^#Pi=ljJr9;<%7|LZ_-zMpF; zINzW3s`b$QR@;Wv&-YKI!TEluA8b6`-?Yy-e#}&=uf( zzt4Zb`Tm^|d#a!B$5{Z*_t(4x&iBi#SEC-j|7Bmx>3){;!1?}^m%#acli*(J=le&J z;Cw$w9-Qy*SZjiM_gW3%_6Fzs7tRLf`w?EW9=g9^ z{Yj`Z-7hc+oUi{s4xF#&zZaaZ&);UU`uTeMYry&X`OmLHs$D6 zd#b>_;Oij%KyV+$9}DgWw__%kfk!C*G4ML#pMoceuNgr-iSG=aC0+}jBYq@!p7>SZ z1>!G&cM|^vyo>nOduyIW;(LL6Zg<=}qeZ-9r0_nWHuM2HUsuOmJY zJV`tWo*{k~c$WB`;5p(e!1Kfh)@q&w;@g3D5}yR#MLY#wBz_LK=MH!M?*aD`e;3?G z{BP4VPe1V;!9&Dr!6U>E2d^W(3_MBvN$?EuufVg!H{3__%n{!OJWsq1yg+;bcqj3z z!Mlief)|PZ2=2MlUH{GZ)jYk#!{9#Rad1EJQ^7;TZv~GKe-pfp_;27z;@j+}d1i=D z0M8OngXf5!2c9SXAb5fJC*YmL*V$k5=^|bQUL<}HxaTf+{f`0n62B7MNBk*pKk*`X zi1@|_Xr2+`dw|!0Z-)I#J$Q!V7l3EMH$ePV;Cb-%!Jh%|1Rn?fHMs9?cO5pGu6g>2 z?*Sen9tV#QKOMY|_+8*h;_riJh_7{^=948}37#W<5O|*Wao`2w*MfHve;K@sxaT0v zr$~GUaL+yNdhQ4ACB6{cNBmZBKlsL&U+;iND1MEDHJ>`-Rp3eDb>JEBP2oQUJV)`j zgXf9A2VNk)_92>2C-ITsUBqMHMdGJ{d+v4D^Db~NxUJ8};C_nVFsk{4h*yJ0h))Nv zBYrY?lK9==8E{+AkHK>kzd@bmlP5kFyg>@q{yhwZkxToN*|DoVs;unGYh(8VP z2j2qg*LUC%ir->}=2-`B_wRdwXDEISc$WAw@Eq~y!1Kgcf)|JnnW_17f^U!fYr#Da zxa)HaxR-by+(*0%+)uoJT&Ei%zAbo!cnx?R@iy=z@zcRG#Fv9-iN6P)BkrB0dFF`^ z2QLsm7`&7Cao}CVuK_O-e-_;Hpu7IxfP0B=G+XoZ5#I&ePy8V85b>kIBgB`1*Aagd zJW2cu@C@+{TQtus@m;`k#OuKG#E%0nfQPZ%UJ34f$X&OmzP7w|5M_qA!BMT#E_?tR!@hd8*8_^IH2;Ayp#9}@GkNXNNS#*N8I%p3ho8B z?VSSdr}#PGA>tQ+M~FWJUPt^R@Felo57j&~;5MIO;5mw~1hc!h8p}jo%48L-G5AXTfcKjs?$C{Po}k;?IJ25-);x5#MNz z=2;}ZJGkdbcf0Dry~K|P_YuDp+)w;{@DTC8&DDG&#J2~pBR&~CNjwdnA-)7WOZ+bI z9PxL+^TY?u(>x2rcL47sJ{7!+_+j8h;+KMZo^sd!ad0p3FTs7p*PoB&5IlhOdL(!f z+%BK>;05xW4BiQD*T);cixmGNxaVoj-^PCr?gO{mpG^-#p5Qj0-M}*x9|g}+`~vVU zioXWD2yXLv7To)cyUyQ$`@n5J8y~KDCMkY5@C>;1N5OLxe;jz8_?6%V;!lHj68{Rk zi}?CSXr4vlBfve+y4!mQxR-ba+(-OUa6j=!!9&D91CJ12<4Dc3j`&XCN#aw$GsIKi zS>k7b=ZN19o+th$c!9X*D9y8z_}1WE#K(gdiO&M}Jm;?eso-94+poSA+)wduf`^C? zI9l_J5Z@8J4&09K?F*iv_+!Ac;LEVxxC%Vf>CV3sJVN{j@H*mKWHiqt@i2IXcnf%z z_*vjN;`f5*!R`9^K6od%t@G-~Xg)=X-wE9Nyt}=7gZsd(KMfwD_$A;G;&+1A5q}#z zN&bGvYMxn&9|E2uJ^?&W{7~=$`OgCHqWC+&ixmGBxc3Ejefk}zdHRTN3+^Yr7kG$x zD|iIl&aczKlN7%kJVWtsfafUw7w|msEf;A11>#}wPU5q`yNE9aFA~2A-1DN=&0f#& z61WfC?jL^y_Y>c2q2?JPJ_z~B#K(ho z5^n|XB7Qn}5!@~hw}E?K(t6tU@pW)NxUJ_e;310louGNvQT$l&B=MQx8RCn74Qu4U%|7)|9PtB znIk>{JWo6cULbx3cqj4Y;9bOD2QL!;72NZhyZ%E?(>%Szr-1v2&j{c#inZ;CbS2fES4W z0^Uh{%d<6~F5+SEBJo+^o;S75c6mM(+y`#^KevGUiN6XSBK}|S2yx#M&9jbp7(7Wl z4xS-?DtMOoP2f4=FN5ca{{&tjzWGwkvlHCT-*MnYicf%h-f}OuXMlUb?fQEgxS!(R z01pxW4Lm}8t8-9K@{a@0Q2Y$=Eb)`TbHuL)&l7(EyZ~7i!Smpi zm|w4e7b*T{aL+sLI`}TsJiWxnf%}NZ!TrP+gNKOU1Repm>*FinNpM@AmEc+OY;lq1 znIk?PJWo6cUI4!w^*IYX+2yXoo!}X8JD$7(yp!SwUaa|af!D*o6L{oZw|_tII^u_e zCyD34GsGVU&l3L{JV$(!oaUJ)J{r70ycN8Y__^R+;5(qb4}*K(bGNq$?gO{yHwRy$ z`6MZR6nKVs6L^;RiQqZnw}9t~zXM(%zS=U)r;~Ukco*>~c#-%i;GXxj4)%EeE^r^X z-M@bZ?k7I@Qq3nsd^~uBcnf$P@iV}a;CB7G3p`8l?}O*SZT_oXruh^oei(Qs#n*xt zDSke<=L2_rE(Z63+xk2T?gzK);}_r=ir?^Z%`*!=0n6t|@Ggo!1iVP`3&2Akx|io` z!6V?dJ}-bLDSjn*hT^xpLi5Z~d<}S>;uGMV6u$(#i{kGA_k85e|9x;TxXpi+D>WZK z#qS6nqWEdxbrgRjc#`5T1J6?Y)8ILZ{}#LeZub}exJvWvqWH1ko{!z_Z2|WZKONjh z{C03Z@i)Ok#64GQJ`v(W!0U)l0#6ce1J4l8f@g`}3!WqXIe4D<;A=F`0`YO+oy2E> zcM(4gyhwaGxaSjh{oexj5+87_=Hny21Gt~~e&8YE3&11bw*Pq(c#`7Z0nZR$^E%BZ zOMDmb9Puc4p7=uW0`Y6XJBhyl-bMUp@FMYTuh%?1pStTj0o+SG1?~g4$D3z^hbaC| z@Cfm@!Rv_kzd`d!g4^rucLvW;{K4Q^;tRoZ#BTu46MqT3K>TO$PU73&sCjk~pAKFm zeiFFnGk2Zu1osku3*1NClh^6`i4Ox05kD9_LVOW;9r1g>lf+kmXNdQ^N%P4P-v&HK zd{6K^cs2H?3GgDtF9r8}u5}xQ_y@ov;CBE1Z}2+sH4wl4%~-A|epm1exF7LN;CYHa z6`bGywa2aM=l6Mi1kUf(+I+dj^Lw)P2j}-*Wx@G9R!@WTd#OIK@&D1f+4K0D-=@>$ z_eSjt&hLSm1c$nf}hllw}#1{~MC_G+Ie4$g{C<^V}8Y)rb%IHUDOKCW22P&op?LcOX6w zkA3dwDd5w{b2>cC^N9BbG#~4E81Y=UH^8|LUxRbIR_Uj8;5;`6=jomY?x}Fs=UVV2 zxSfypfOmr1`LevfnQrgT-NM)P{KIqN^c|fq2O^|XxbuFiO80X0vg48W84>>m@%+5y zzYS2&a*AJ5xLF>U?|}H^aGh=(8>T(L?;_7Qc$haMp5G&OIQaeKITjw~%MkA!srghP zpWDG7CeQuwFz-TqWEb^31kd;2PmpK8K&=n+p~B64@$IIbz2F%O{w#SW!NWX(_~h>D z`3dv=c<>j=vjiUI_gl}u)U!JBc^mvS@_Y^t^YsR4yE1#IM_xDJe1?I)O`cK0&GN&% z)p|ne*$SRh!QUg#Qh1o(iTFG`o50fv{xNx8gNOM~i0_1F06gpbP22l#@@y{Lw2S#T z#CO5-D?E+hUz2ARJj_qFo>5vKd2NdGxfuL=@>~fI^T!e2Ioj=60sb?2K8J_-x~pg% z3S-n`_g}+T)jIHYU?DiKkK&sa{&^WfFh&&y|(<+Qx*2hPh+ z0-TqJJXue+>=>i;}A*YgW-&k0)RwfpOQUm^b}w_jLwo$|c055CK~S|49b z{q{KkHQ*uQW7aeKoi#jvk8bTG;?;CKhX^;zfATx^*yr1f-avUB-Y;q2E3`Lw0UVnj zPX~A>xXouVxPO-V?fH&tz$4&xeSZu*N%0?mXNmXUQ1i@_#|Pd?@v}EVdlQ<^JE+5c z8!Pu7t~>(%9(d?@<>SHs3!Vj^4!-eV^%TMF_UGQs4DbC3{!`b-$OG!P$EV}AS6+NV zxvldd;K?W5eCQ53-7auDe`~;t;7gIh5yLe;gZaK8_!96Oco6)cG5tMT_DCWzhpW6q2rVqYSc)9v-(O!PKxA(!n>Vsd^QQot>5B^~veDIv|o*Vn% ztIaJR|De1;s@(iN_%K~gJR@~EvCGvr!p(Nfzl(DHe~)L?!Q&Eb(o@bHqd7dEy6x z7l zc#`<_;2Gi%foF-o0iGlNHF%!*sz+&i3&gho?<77Pyo>l`@FKX~ZypNn9qq1v7TgDJ z_v4QUA0lf;2+O5iU*?{M_2LBe$mImiCkyU5N%=0{4CI=KF#Be^74wQBm*^xE-gR2_7My1g|4L4?GFJ8|t&b;k}nb&#-t~dn}do z3~NiY$A*m>U9(er{S5pyqpf3DLr1)&X{UITXIRtxw)Ff~&oD#h#8T;aqRsV3G^Kye ztWVF>=yoBgSWCV6DgKpgX_tQ`n(Eu@J;P!%qs^)M)>w3AlSJCTJj2>!^CT+X7MK5T zmj8H$IX`P}NT(G<>r<)v`5I#XolM2dFYKRY3TEOmF(;Y&)_9}jmoSr$wWJc!WPMXp zyiJ;_xwN-N8(R`>v1ogJLrctw)TufXZA`Sb#@gh!O|gcK8PWQNhE!~h{VU$qY^LFS zlWLpM5>K~Fp89vXJrzwfH>YDRWWSXZq~RLb5^s~q_WV04KlKcgic3QB-kiHL>QF`WbHGtPqW{q^Sflm>3AA8|&Meqh>is zv_6hSH}~L?JZ8h|}LB(p;Dt?d5kj8vi{+54B(rN4Ay zoF9&yJzf^QF^QxsGotY)&5|gE#^05ZL^?jNHd#Nntwt8ensM<++|+EsoZ8B0WJ08| zwmKRP$7OQUqP6~LZ7?d_{IA;l&rw#)O~u<~Ba@I-p{>1lVzPL3x6ejtn}Yhw*7OY7 zLiPTks}>p*ZgD0VNX6=#qW|;XRhq@DDV~ybt3}pZEv#i`jjVTAP!g+8CSy%e`<=!| zCj8#q60fx1X<2W9$|HZuP+Ose9ge9FuPn{CgqCDnNf~$lhNl?xm(C8&d3dE}T}IMJ z-hEhX=61%)zOUR8;hwQhkS-9F^O{nrSYvcC1QT7P3rX;Y1436me*%)VYUivF%A`|~0Mz*^>GcBg2cWq#0 zZEX3ICin6I*+@GdXSA4JNjfI00p`w#lCMm0%72qc#>{3!=D#ybhnM`QMXK3uwxBw< zt4hA=`NqzGlI1jQ_OSA|?7q_RM)xjL`Ys*wT)Wj)XE!Xn#$I2<8)PkT#U{-b9(&r- zeSM&^J>3+Y9h>j$&06!ISiMdhfy>_@~nJg)dv&+^LGrln))1TMwbTxYBk6FNMpUPF_ ziYBwa)YUyLJzoinnr$G@^1z7RfgBU`ceKrJOU!M9Pjws>7&$)HKB}>?qqU<&O4(Fv zrWuY*u8jtQm>-IG1`MAROU;N)bGA(ONA50CwSGB9OE9`V9Sb`vTWN~}BU|g|$rj&q zb!FouGc}Q#-z!JIv&}~(?2Okcq4=dUG#EgLPJgP?`4_8f2m_`A-qRl}%_Lai7Dx36 zp?5aHYMpX-Ks4DR%h$|AiyUZh>cI*r?<8GRUF8VRqJm1+9Z^|k?cBnW(@QwQsg*e_ zC>!0qrIH?g$!5oeTk9L0vbmtFlf7a4{G=QS#?tN1S3SMM?i^5lcXw-Ih2K%j+S_v& zQrZ=F8s%D8CI*MQf6}X{6dt5-Gh?Hz9p%i`?og+6jrEN)V|~PHuf(0a6JJJNQK?5B zHD)s-KN)3@*`{=~PidAJZx&~>XQT=RE4oVE(V=$-gigx(3;Zl?$%w$+5M5zAW$U9jUfxTSrSvndrcXR9ZGx z@m84+v3ZTLSd-aP^(->d1Cm2VEH6}!V1-t3lI#-4)yG>pQn68OO=cI;DtqYKKs4SU zofzqGb~wW=(o^b?K?u_ga#bo=1x-g=FHvMpYabG~Lk{3(MB5YXvgo#o1bbW8WF`iK zvUhEaNpsuGsEsa5GA>c3D#1WQysbVp|BoZ9Cym=Pk(iw}lIazt$C}al#zQ;gm?hen zk|8B$Gad^EWGO8LC6zY22D8(MHp#F>qiiSwwD$H22vR^Sm1;{6>+VhT%+j7Q12>c7 z;Z!=N!xkmqbTrOP$LmSk5^vCrQBR;CS%MQO?Yxw7+llHm=ZDKyHrzHREK5sRHY9SG zYYz8m@hArccUs=Vr3!%LEVy1bjFmbYto}y;&Wo*c({6YBo%9p&+9EU zJK3pnM4M=pU0`cNI2cUNjwj9FNXntoIgw+?mX?ya=yVyQ>57qls+iq+_Y|hJL;4sL z)fSUs!X|>UqiQ$fGP;_kWoRlE`{Ssd&TG#k+4QF-*VDo6Ta-?;)TiRwIVy=N?HapR z#z2L$)rilFHJO2taBHg>lPW2s4m3HH=(QOR1V_p6L0i-0j@Aa*c}F5HL8j7c=b!hH;t@kA>5hm#YdWl&SHYlt*D zLq0s*R#GO}EXhVoj`d16wlsx+UpH9Y{YrN|g8}F3CK-}UG)kAZMfOahEZvlwZ=A`L zS$6DK&1Kj2?q8N&rtEKfFX7$4E5CTiN;E^a7Ch5}m3Gf;mXdv)&Ee$47}+S+OqV{c z96HN>J1saYe~lPrhSR6Unrq~bDp_A=#KPgQjFn8CEJq+UGNvYfR)y!7eQWj9n6r13 zoqJFH%V(EvPbAxC#^xqcEltt#b#aCCZocbl+fgf-tc;GNWOvj(sN3^ZdrR6Gf;A)T zP}zo;9Hm;BvsZSnNC7>XHT@Oo zCCGLoE*+Km^TQ*X<0&%`F6WAR8l@|w6tbbQDW=Dez4ko8s!8?p8)9-Wm1-X?T{1H^ z+@!l2Id-W|8}97a+<8n4jFjm#H6-TAX&mRk75zE)d4b=}rRU3@E}wLe8)x%|Soccq zW^g){va_2g4c%8yj)fy}Ga#*p@a}C*z%P9*SwhXJfie}AzE~fs@bKB=WfEh`Y@~x& z?(CTkIZm>AO4RfRrpYoLX)T}Jh$cCIAd8h87DtmZ@EvVz2uA`zq0V3CbXTh$&{C_a zM>RD~N;Gw}#A;(ROix^TU zGl{Wg3X@F_f8zYW@QM+Y-TyUP_`pOthuaxj|qjp&-dmZ{Z-~3KfMs}oQ&azmx)8lu3UTeI)H@NP$1uErSkkgse4Q!xda%}E? ziPY@LiS~H2jG^}0xX|ybq^INj9^;s1g1pANSKL3EVtBIIu*u>ZZ<~{tZO4#Zn^HMd zXw$AWHPoi|&UNAl>4!V>t8}4rsv}#+MjmkJ`NFkTmMBpLFlp@8luJUU9Qs>0^so-7^K)&G^EUl-ZD)o+8FV%T)dkei;~1 z(q!b&Ylcg&Z1&h#bA3lk`#9$skMgyQrezd}hj#QFjJbe<*4mygOsP!=w(JaXPT`oJ z|LzQ#7^sez5vXy}Gnf5otj5^~oAo$=>ML;#6O?37<^Zf`!mahG**FW-)9EVt@t^2_ z1%{{R#+%z^+tez@m@Tpob(OGZS|uB}UTaaXLY9J*^b*X8@v)^R%;Y>rh0LLzQ)s;x zgo$><)mf6J$VRtW28NySlc^I-p(?a*gL60GSBnJ4ugowT1p|n=obExZ3cv=(JIzCx%5*GezR%^1!I3Pbu2s4geSRs2BJ!Q7mgAA(rtVAfgxgso z`!#J->Dk7L;JB8K^vr$Q+MH7!lVYv1p>LK!JUL@5e>mrVq&L|u!MS8{%2!djBGg=w z8Lih2g4x*iT;&%C%q}_A?*6XAp5)Ztxm=jo)^6rf)LbbfSH9^1B3~m`2%?Z)42aJ?A8Lg}taiCnwvPdg;`eIMYXv z3OF62o(!E42c3CNe=6Kc=V_%dQp1#n*veI!&WY@=%_TQ-upBU#y*Ud~*+X0zVReRQ zY0f#BxMxD8nMbvuQ{>O;z2_%G5I8m-Q7p{6Mg0U5+%C3m(zCmk{vl&K75CB6Av? z)yuU^|24B9AOrN~boI1EccFW(YLP9kY@U*`=+supn7-4esqBu^ZITpuR+;UhG(fg< zQ_L8$IhE($Cn)4*O#!r(GeA;^UYzhK?>454==hR6?uUtt5$}AjZ)b8H|r*^cBlT7tI zIbTcEb4i%lN#n$Q&l(c!zGAFBE=OMN^{q*#hhhTd+Cn+-o)K#|U%6}4^D|?WQGNSc zGms{GnI`kI#$<>4SAhx{K#is3n9R9KD%>i)>Pop5vb7o&cP_3nMrgFW5}^sGL5^kKGc|Bq{J&q~)L)l2-$C^jvnWi~PHb*X2gG6D;-jGFx^Pw!2Y zd)mXLXCOzSZHZ}#@Z>3z!jsA@gEBmwo|=}SdD(Y{?PX29x8#9p*}cl8KCz~fl}%QV zzh>$DlWSzSr$A<#@rSFAEahR_g)P0{C>ZQ+guB1gyWPX}AijI&;wrEVQIvGOO0S6P zEi(ISGm=pnCA(m=n3r@qdiRrLq{4JboI$-vhjSYL4}MT7dlBvI*!>9Y#RDU7RAYK^ z<5G#%sqMHP+TA*|9kr5vzv(qiOC(0eXP5=_k8?K%WTwr%map=^Ew@_9rcibue{y{= z>y^6+(#bRln8UV2qa6n7Gx?qbg59?!m?Itet#c#dpWbBjwSosEAJxgoxNLv^+{Pm6 zT(0IEy_=)2-mS!pg4@}N{SVq2sIt8zwDr$tCgtw)&``!IsQ1*fv6PD`oVohv+V2`N z=uS5IKb5$bjDRi~5w-M`>kk*r;kH`xU*}TPKe=%J!N$4*>qngl{n5M(_RLGCP%^$`M(Ol=3hC16xKU4id-l{pGcF-hG-E?!X3BjC zqvxB$Sm_752k4~l9F@L!T<#RLbHn*pTDGLF!8#Ktw=&3?)tPa*cdTTL&wOLI8BON2 zt1~RwGd}44zO18zLO zL5!@G$|q;a$+yb=tOcl8u4FkBBBCd$=TPTyJYRVf*JD2df;5zU6%x!Ok#F#0@9 zDAz5d1B{+|C|%s&JJ6_>Q$ArCT?@+z0hy*;8yA@q)*DQzFe6I#%jWQOl#I8O+3i=D ziyq`kd1p*JEf+QSu8^EGh^2IZXIesrr=;5EV&wn*L6f_S|2aPzO}jtBi=xgj+bGeZ$hXTmwHZ$^=-q_b)++ex%JCm73&tN%3_1co~o zX<<0u8N+whesjkbjcv8|9^(vD*V^0IrMKGh|L4LIsBjjynx<(9v+R|*HmUp)QXxx- z(+w@N_Ek?x$TK<;ZSls*^>XMY12SdSn!l#bLFolKXH3kj*4mn4>m?z*YaI^HiOg!1 zHDOw`_v-%FWE2=aTK*MVfPRq42BnRW3GkB_BG`qyM4!%M-ja;ze_T#EqLZC(%b%;G+*oY)3sc(0$@%IQ=kf#R z-m+dr|7&Krv%s67gYI2&Und#LoMZTlQmBq3lFmK7-TSG&a_GI|>Q$Ei(`7L*LU*{e z(yNk9pE;iBeH(X4-?&UaS}u>>r$vTz!}1(~aI0LXFCF~GdG%2_+K{`)O>=v?z(ITC zvz{}qX_9l6W+yo&=oC;oKzd}LU)xJ$dY7!)IVWr`O-Q#TO&3_s!8FR@AfJZmYkGRJ zZo9VTjR=)K9-IQO8c}ObdIECXtByUO~yxOea)LOfj?^ZpIDv(T-&N~ z<|#}8Io^>gx|}QQdL04=s?4aQ9t}CsWja4{je>K5L0>l4(Ss{kE~8A&H`}eS9NU$- zW2592%HGSn-%QWB@Gd3g>3yshG`}_vIjV1&n3(HaU)!^J>>VDc?iJOuQ!;zYY5US4 zXTT(Y%kgU(^rV=(ttBZ2WQ;_g3T7|hmle(FMLTl=(0OWsT=~|!wIl7%OKPnf*pi7T zD4iW;MzBlb0|BRgx_Fz#+L?6sP(+h16j8aKOpYIik7TuFUs<@}IUp`f-^6`!n<3rVcX#=Cnhf@k7)8|A_m}_BM7T>-kA) zQt_h#<;%4KSCoBc=Sx2ngG8I@GDz0``u7wHK%*N~KzqWz`^;X^jZ+7OoC*a2P>EH) z6awt6iE&ilF=Xy>lbP-R6c!y*2_qg9Kr$(DxD_HDthifl~@SY7J6-$FHmg0>Fvk?`}iZuu5wX!|g}>8RvurfD^;t&C51jW&*&8p#YMN zKn%AZUy_nSsQE;~sj?Qs?MM6(N+GD^GQfSEZKuBy{f!jN-^_n!dijds_M`cesGMTB z{fGlcvbt7kl@Mw^K{zEQ1@kx9H>U)pVE$(QdwQ&^Dg)eCiWc*~R63i%EMf?nK|ys8 z!|g}@lU4dXLU_Rsx<9C^Ed}$pnG_VLEr#2Vn~ba&Za?y$C_zEY3c`OjktZ@-47VSd zBq1yV+*kWIuf0%WtN}!4ujGJOt!Yv)f17O%NkJi)MTCGbzhhP-u-U?dU)NI#=5K7k zsI??1lq??U*k~aIR&!bnQhoQo-~e0AVI35Pye`wqh~f4l|4HefApB?CIwdPmEfb|+ z{$~E0idhD@ucIB)s4>d`_tpMQ#jioC@5`U*#EaqfBh4w&(h9GXNJ_|n*aur?F@Ykph^&}xu;+J|gBtq;LTzUekv>pF40n;{VwJ+H z#>2Hj!UXaALYHuHAajaJOQa&uvsR&2RY?blwn}4A9G$!3bm?AI!Lrt8iV*ftqpzWvkH9xlmYJR`z$LjBfQ|dJ1$A6pg=-V|GON& zCDzAyy1Y9JkK{-LIt^0$&JD_yBq~NwO#&{i zF*MkTnIQ#RX1U}=Y8hH_2jQ076{B8lDrp5P1027YT2XnqI0AM{m17jc?Z+Sk7&E2MIGv64_)=@pj3k3v-_FV$;Kt%pi~%` zZ>b6*{LB?ntQ6wFI%bYFiN>+x(;pR;k zz4A!G{0(~yn~&8fu@|l&{Acsy00)B5N^aWmVV#p!xBd(cyI)BfKDf zn0!dBNL5S_V16PuvOE$d5XMiF;tY?B353m0q&v$aVFF?N+=?W1PaA>+2T?V{xf{CWeQwD_|Krql(vHKpFx#jP!RsJzp)LsYI?0ppb|kr`FfPF zuT>A@e<6heG9r{=bqY_9%jM>Gsw>WeB?P<@(>aR0!Jdtz4P3^cS;1y$Ne_7?1~d^; zi6YZPM3B20r20;eswI?&LyZ9aCw1^3hWn2YEm!aXt;buI3Z@o77`(=pIA>8!jRy1B25WYE$9OmwDg1tq2?3) zC)GN|aQl(|Y;wXez=_e{Zd&7JFpJQC69=|ZFn@!A(>_z3HAwZ{{1WPE(3Ukw_1*l! zb*Drf(je9Mpu;lmJT@bodFdJ21}0 z|BR-9LAyu;o1ymKNOB8K2~{KT?3C3l2wy%$V3WrG+vHO#UKtq?;RDrN*vlqMNyY@i zqcK@p>KtOY{m6e3eXR^|U*YXBaUv>&nosnfj94?6Md-f?%L6Hxzrncoo!ob-GdvmK zzM8*z`555^cUT<6!9mNB0q(2$o0pFfUJyaM)VxRxw;%CmJ5Jcfh~f4l>gi@mua9NA zT?G`1;~Ap#s3HU0SNq(iUI%1=`$~%uxUba3Wq|u?|EBs64N`r#zf!Y(4N`sgzgp;m zNqsZLq4)+Bn4YxO)=gr#{fK3cv8qXkYmn+Ys>k>vG20iz?MM76-3Z#ZW@$zIm0PYZ@y90vSP-ptq}w3s?cLe1Y@% zk8xs0MM3z__<_R6QHYRA6u(v?BuJi$2?C(T9$vW;$x#sgGk!pk9EAv}MDdG~lVg!F zf$+!Ju;{g12Dq;ua2QSA0JJtDhTD((F@BG znL+@C;`mMb(37k>8mdNsf8Zwg{u7g>WZD!^D30Ic&>+VoV*+9PL>14`h>%JYzt*@q znog92@Phb3k7?nKb~0mROdw1@&8v{Rgawxl3Paqk9KJ0_DY`VU8R}nq&=aabs_*`n zd!)VvnF1B4*u#M~`d~XhYMT5F-OUo#2dQIS4N`sgzf%3U4hloyvw1&Dbem$h{fMDF zEi41vSC&q8{Dwb5ctOh)0fPQlgH+#%4HoBm?T`WPt079tD+vGDGNn?XL8|YLEybc@ zf&iLU<%Uuf69i!28V?HLgl?o~tcnQ&_-Fb+Ld66Do|3k;Tc{+!>JQD-z zh)ykr{6^|1!UkNXjK0zf1(`)+YncR44uUs~abn%uUMPiZhRs*#&GD)>Sc zfqOhZ@|#f$B09Agg2l*wJzl5Q-nMM(X_m#(Q7ECXF`mvbtR_}gzbTo8i-7I=<&c`5 zv=>St6Jf2ISgjaXNm(GnR_^wjV|;iRjcWA*l`%Hc zC_7s%H6yaA(bfqiZ4din;l4ENF|D2^BGysCMvIr2?FLbG+Z3sVplY2ADF~70`X5G zgV7Z+>vcJaYXpu*8)&U_iQ)Dm{v=N;#c=y^Mm>NYtrX1PeoeOUD_D+UoRZp;O~LA6 z_!qskzHURA05)mx?{bINhZ1?KL8|Zc>(GWq28H7E8-_4?mdgP5m8r4>2O*A1lz!pS z`NS5Ag7BZ2E6;I|;t7Q5=Nopi5_1&|QhjH>7BmMju27tQQ>y6y%S<7uxCROHtLUIx z%pjv+`hco%p3_vs1j2?`1T~ctL7}*1ElykLB!EJ3M=QVtwFFd+z%q9`Eff<#p}6A} zV1i--szzX$VF#V5Z^(NA6pA}u0VXIWplSs4kG~GMADhuj#00{Iv=!4qM%4)DA1`>e zDcykz2ro!KrmI6@POl*RXZkV5=6_lT0pSJd$Dsv>hzW%0=WMYaWr|3GRHF22vOeHx z!*qf~Odw1@Uq1J#^@awizSA$*J|;K#RZI|opST*+W-_6|>J-F1As^%G#F?HB3Pbig zymdED7y>H*FLpqIe&~f&mJ*=)dceGOo25`dpb0|Oc=;g5q+rT22>6gR4mz!}6cNGa zk5Dc2mD6qfIsm}~=p)cAgw<72K;ZHrGrHZQ7nKyu-)57Hq!ez2;LkcEECQUE zQ6OoaMC=j3iCLN`Jjgo@Qhlcw=;05tgHD|TByo*E^UsgAq$;pUGXo=iOO&#Vs07!z zc>*H`e!sOWbfz+mjDu_Vw!95oo%L`2pR4J|m|ml5z=LwmLF4e*Y;o8nx<)bFew^W$ zYnK?r#Blp@vPc)97;ZmK7MX};fcuJt{|3+C>A5Ha+*ew`Z)cMddjRRhdeYj*mRTQ_4?;LRaL1r zil9(@haDKQrx_4^-EAs{d9oL4Z3qMgN2QMdRtB zrI}_DuaHKc1)HW|_-O-|F-tnx-6qOO2DqDFCXkCNvelg+`F@`$CE%Xo+PhB7)Bx9z`N>L>R|{01Pj%P)(gKC18MH#bSuD6TLBsY)kD2Dq>GZxlt)IyFf6J^ZaEM+0HQkP`td z&{EBk)q2%<8WFD;q0Le zc@7$f!yGYjn?(pUpRnRj%Squ@h`P4@bet%E1>rxF3eij8~^!>J%6qTqwcgqGYgRv^PC zI6BSM01;A&@-MMWgQ}Q7)q>nV$SN$^F+wZ$yx2h12(bI`{&Gl|3d;cZ6@PQmakv%Y zKeJTL)gaY({Gw@R97&L~DkccnPG8ghkY#}Tioere)5Jtj2Dq>EH|=#tLHN)3fkUIj z+))O&ubcE9niS06HkmDdG2DJcMLg`L+uhqVwI7{8)q?O3GIvgGplFclJAR?DZ`Yf| z;kJqi0{+^;)GpN-$pH7&{B1Pbph8Q*{O!e`jVqHc8Q{LcD+*o!#YRDDVWfk?kOR$5 z^dv~Z{B5*--ITbjoI1f$F+qUa(9D@;CUK2G_s<`&dDYXFK-GeBcC$F5Z<^Jz2-XbA zHIo=_KjM$Do(d|L3~*nGCDk~@aQhK|l8ZGl+<){xl$veULNUMp4Ea?~bphbS@OPgw zw-UqcN4S}o>(xaFHJ|XG#M)8_HJ>=jgrE#?Ux_0jECt(d|9H44$x;Cm*jdpXPWR8| zS&0X91HHjl42}PIC<>Iu2nTg=Be5-x8SpfH#S(A(*_8QVg(}w5$5jtrih8uUUfXbi zxY@o4Rp@lBF6LwqUN+0^VYPxiRnKU095Bq+tD?XXZ9x`SAOgZA1UuU}|Cp5Ih8gD= z7+_w}sWnq_Vyfh-A~324G2p>ZQ1ccFgqx9fT;znH*qf_P-ZIO8Y-7?ez- zb*TOuQ=cGh>r=fRV`2LXJC8ygJ~GKM;Zb_b zS8QwuVw@fZ$Z~R%3FPAT0#$GZ9>>O6PN7m%vP|IiukK!_4X)83=u;8k1 z5s@TChbfmJcZ48Zu%_@x%CN~|oQ(`n^x|l6+S#Fb) zV?Y!ie@>^+P-`hn<*E@BN`t?PrD#h(Evcx>g9xYSUC@{A)WV`uEfEtQ<gowGu z`Ry^fH0?db@fJM)^YSIlE$W@^>r#!JTLS|WIVD#=6F`BPRgj}aB zB9a{K^cY#vA~Z=QRxTYYE2MMzQ%xx6Ed!!hF%8_PlAN~=63M=k&+*hS$N<0jSPC6US;TT{ZEB~LiKD2Og|B_}&t+)?r1JBn*OajVqKvI+ z&ANaMxkMdwl^~X~bwEy(&s#NhR$NVq3sQluXROJP6zShyKR(<>wF~q7%?FPBq&iw~9d!^{)_B_y z8gL*Bh$Q<;Z{DTjH4s_Wc;89I`?>{0k~Qg7W`evA=~66A1x*kVl$SRkDSnfvoV*3e z@<>=--hd=|`+j^X?+iqeweP;CWDO*er3IrNG57W!BTDO0SJHCaF6vyK8U5rkH^e!4 z3lg=r++ua-#hp*RpaF%YRHzs<9bht}XFs=92Z{8@MU$MYgG90*H7bb-_wgD?Bn#4< ztb;_d#$znqSA(N2G43<7B^AQJfqGr*%FnSpJB;_{j@@npe+;|-)nigudgt}12-o{t z{4I*U)A1`gucr~752B+~1|YXvX<>1CTE?4$VWjH?abxdri}OlV=!nr7PM{yUplBKF_k1vO=X8ouDD2#<;N>U2UYf%(e#5hd-S<61+a zR{bC%vF00TY}Cp1FOViotmfH>JVZ|83KDdr<}nTXj7VI`I#I`4;iE!Er%LwA{7zrY zW%sEAl*B+5hIs~uk{GZ>_>;oL>c;7T4h|(T&~r-8vRg<8582?Ta5@WHbmv(Jr?D7Q zb%DY!NJtTYGva+B^7qUiEWLX0SC~|Ta$el6^#+~4DJrU0kxJpoH z6d8RKWeV$IIZ}wyDBrf#j{~#RFd;7>A$5sh zHzo~omFD*3G05QhNduV2AM&z~TLS|mxpI8iZfSyoUc^2^k2E$m*E@dh6~QA;v=!VH zSdMV-BPO!_ipR*(hZfR=iKO|QgAOHAN_2R5ioqE^O)Zg6)NcvrfNNM8h*`s0IY^`n z0+6wqFxL=|fpoK`c8D1s-sQzE0#^xoF8>M?X$QUbYlJ6K<{*+RqSk~tWe{eJBu$!Y z;;cEuEl7|z(xT!#w112ZLXf0rML}+LA7cVUa{DowBR$0>igJ2A2GY%vbQ22W;%SZ1 zDc*_2(7D)*_VB(av_Ur~WJnyrIZ2jM`+~jcwAuH-HfOz>!{Q4!VoE&gj<=zdc~-}6 zG>ZaJF5#UNJdrHt5toZaM3QFFP8;6tHAyKK>>+l9)!Ec-gMyb3p2`5h(5m$EdYaNF-N z0`WC!QgLC0%MWnC_4ka<5Lt1DAwl&OOEl83_1=Ym4CLsxok)+qS0xTNmu-O%fZ92>>dp<+*~i?0UEh^KXor3A0g)ndf#OJSL@}5({^bPDLUA$k%dWmca5AC`u&u$FVuI+OP4xrO<3x zz5*%o>S0$XZ%@1GD}GN+hasX0d~Q?2DOwPPcIp1zUc-`QW z0+_-|)R|6A&PC5MrUZm%!qi?i>6Q zK4CFaJFu}iPP=UK^1&2Q?AJY<=6?7yQkFvZmr* zbGM#5l32Sq*cH5xH}z&*i-2U>u^abA+T^Fw-_zO<7 zN(S`)hP}WQu+4cO2d8wlh)8Gtp3>PO6iKCt@H~V?g_dcxQG~f&dyEn@e1?b_7D@vX zNl$21b$p(b_LxX|(L&l|B54vBN&QZ%gk$ zk-Q5KrMZ+HB$AzD*pqF=cyu)3krW+A$XyIenQ%xUHML#iF3!x%^_Zo29$S|C#f~z= z*iMVd;RB{RFpj8mbn|>SEV0cI5@MTrcvLRqmI=3nX67!&XzQxOM0v3w$Mw26&j2b= zBtL5-Ux6ZdWS1VhFMR`vWEV+c*Gg>!ku+U0)1zi-kBOuqjH_1KZBUl>7)Uqj(m7q1 z6mKN3kapcOB z9WH9E6Fl~?5IiALVk&oU@H{Q1gsV6`z-%z0^dTY7$DSl_Q6dAb8j7*=SVz)8h>`~i z5oIgQ4Mvn+tmVA|d-V-3AgDN1RHzs%O4UK7;5{gl z7by;ebvtdeAe5?wN>SHB<$xBj7niF?EiVYAYN1lpwNN?!I*c%eigmQA6AY4dC=(2U z4i+YIysYKT%b=eih$uT<9pt$<&!?y^5JZ$6W9L^)c_m8_QT6~ID2jBIZwwOU@iINV zGbD5kRnKZLqV!~acv(zE@hWx(iSjR>du3Hn#cnX7G^Pg1F0fqzQLZyclE>wp#JF1c z@wvi*1sPRapA?svl>ITw8Gu1&Ohfc#wuDAWWUKATQwnK^5*XI+2?DOgW$8eI z{D-bQw_iM6W+0-hQ%p=e_gf_IU=oMZO{8!+@ZnGqgLy4ml(q9)x*`=zP3W^I#ZbkgWKI&FK7R>VO+5)#+@5tE>gWYX>Gqk)V|03?JF>kGG% znT15M<(lqSjDMCdcnxAz77~Xuoo+xgToRLADc2*_c~;Yp@!Nsk2ao$M=g!S3<**Uk zY_>S;HcxshgBH8&5;0hj`}NjNtvSoOz+o|!(w`eU9$K=kI6z$HFpCR z5hWXY5*ygHiuuKo77=580%;PN!DEakJLXd{UwQZmP?3H+?2?c@peW8= zgM)~&`H2pRYZDy`Q;qqn*a;%Wj>lBmd$>ktHL|N1v zG`z!9ECHT^kOB@Sa;P0jm4^foWlnxQfo&n43=-9|$DS4u>x1hA^vcjXi~=>E`*skdn+G8@#ihG`w}V$e zrZ|Bgo7-}t-I7g{MMQd#^8~5R@ob7hMUmN--tmivTf0gpIf6v8xQK-;LnUFGXxXIp*bY%7 zES)#dcV#lo^9Ke%ndW#O-YmdYkZ_h^8nC3Xz4K+lqhy+|ib_1RTSO#@i+Nfe+3IT% znxqmduPUspu&OA_g&r|?nhqkw%(6<KwO&$g)Z2T}D)Pi+a${ zyE(s%tZ2(_LVjsV2hxO*wf?239i$i#MOHni=iFxXM9uNK#9{v{BVU0Kf3ww`*!FmU zEfze<#kZ0XJGSvAqj%9H(vG4PC`uw;_rbKj*^BYlaxG z9_RC#@!Nd$;RD6}Yqm|rXc1AG?DtZ({NYyOF_APj>NsXa-t8rMRK$P`%V=&9nxqml zYi?!Bcn2O^WE`_1?=<(QD1NjKb8U%CE5zZ4C&D2t;-KLk?Ofap@Us(!sIu~KZJV%* zK&Z{e5OBXP-^}hZZUIP%u#>t78)-vx_44Jj6%+Jq^K6A^z};-FU9(c97d8!G@V=bS=k!K%a|!FTknbR2jVtq4&zM*U zVu;wuhJ8BbOs}W%-Ui*9hcy4OUQKtfDiV`v>j-tML?)#5tV*i`QFBbz#R|e zlrSkl?Izl_Hq3G}3Q$o|yRd}cLoUoZFvW9NzQ`x&cD)lY8K}yIR4G<^Jd<`r2B)%c z%4DKgBQp1m5AIw{;4&&JY<^;53Z()|y1_rER%(87#27_7QDVO90|CQS4l+@aT5#o% z(Qptf;=tPZ{pFA{ceMyfQWLZb5-5HkGvsFHaULE{0fI|o3j0j1goPbcO$5M5(lqze z^yBFx5@tb>JUnD&+!kAekUBSU1Y#;q98JKK-)?Gli#)fn2!Y&LrU79bMN1P?lHP9D za+Tof?I2RZaVRqtYm?WF5Rv3Y-HqZCEfXF~>HErDi8k_XIvO+CtnJ>Wv^^RRURBli zuqbYD@U2jhz&BvVNYZHg+ciDDF2wIwC>9jS|Fy$7EcZcn4Qvszq!=mZw2_OXUb3gl z77`??`-M#>Qn7i$Hg%&Oj;hEq)V>{yhW?X;XPH}n*z^@A#bur=HQRilEl zJvqWUmkO)MXpbzTlm>Amd`6Na(IJjxr$c<25*SB=woF9p* z%|jjOX!3(d;ZQKCWg6zlhzVzFNu~tG(U>gBRfrVG5v|}24O4Hf zJV}+nI2vefT;C^oQlOBi9g~U*sEkd0PA3%<5KUE3Sm||CKxNGFb2_P@fCy#-Eh?0b z3aH%mgPd9^T%E3!ZlF_f0o_Igg_*CTYE;`?!=YOGs-Xg^U7OrjK>?9-E0fFo6wcA1 zlf=X`t63TrkhP*xK>^WLZyUZ4jaF(y3&>PVNTq@TqEvkjxl*_~othU7Iu#et&G=4G zvR)w160jPX8fB>3EZ-s#rQX&;E*%xJ)3UWj+-GwzPjFI-a8C(|WD6w+Ep6bNdD<>101BQg&6JeGyiFB0c z(cNoQNx}@y(IJ~#(PeOsjwX+|8HPP@@^zF{Fb}PhB0(h5iZzLiXvyk!dw|_z3w^@B5q<*g9>pC>pd$Y@ODwcQSdv+9t7e zT4;dbu@!3yS|`n{UK*K@K|5-AozB$&9pXsXYcopT{D3(pHUX23pshd@f%S~ljywUb z6Qc}%PxHJ$6hT56Uviu+AWwkn#K?2!)v9flr}1@qsFH@qs;BXF`p@}vkz1~Do+OZU zN@$FIu=yINjGyFwy;ZKX6G}xi&vpC0Xansup?XByt?_waP6J`P*(yP;WPCKJZ#Ktr z zbBaT9+(_2;Lw(cB0UaTGHcsZA4hcz0vs}oem8eocM@S~s#~H2!CZx$O@rg$Rg+%H$ zn$L4$Qc;cyoim>Lofvl|u+q&WvK87o7go7b|n*ZSao%C*1UBDuWJj zB>Pt#qDz`TH6+!}%T49MBPt-&&BRK#pknbG6PLAS5u3 zhA(m@GfIaz61+aAgM7tGq8t@FK*+Se6vz?bQEKBQHkq^($Pta-u-TYVD1mV_vjoOx zQ3_NMt@gRiuGv_{)M&DkPD7-j9Mxt{j~gX3T8D%rofa$##?hFYs7X~+L_ncL;>!sf z3M3%uW>NzZ1>#8NxZ#mC$4!AMBJ(m?GGFj$j8?B44II#!`_rlZK7nyGaP*6%z#%uK zNW%g$?;9tRKIa}sE|-81YX;-ABDW&~4KvVyba!}JlK-Eak||t3$8KHnbSa#pBmW-s z^mc|V4QrBVy@qc@j!pwQLU$cFqg@*2$c$lChADw@G<@|cS(G}&k(i5CIbGQInG}uTi^n97BUDTQ)6VMrW*@6{wJRK5}u>Zs?kq&cY)Sk1~ZBn2* z(Poq#)0j*L)(PqUd#*y5BW)qxDv`qbcd`sZRt4Lj%?{_5Qp45h$Og2PiaM}PxY2>W17Qsl zNP$w>rIR=ns3XPD@dmMWBSISKC^4x>1HGDAWI#DtFGurL3q3)a!#Y~nj#y_Q$!eK~ zIkGlP35=s5+pQLDc*-|YU4LxlPZael(OZp!|`vX|H$AR-Jyl$X#(SDFfT!b8I@9?fM}PU=YdEC z1w>nN94+OguR9=8K><;IF$W?Q!ZJm-(=h=}iwu9$OC=zhw9rSxIGXt;X9}$%90gLX7DtuM36LXN{#EK5 z?4Bu!3aC;C${|?_7tonXqA%t`#tgJcinY&l{+{bQk!-`8LX_$a!vCesw!>vjcWb^N4SBX$L!_$6vz?5gKGJ6nsZN*L^-PM6j!O>05DINMLJ4Q_R~#*{j&?bJ-Qr>m{6FUHL0e!wEv$#zab|#? zP~T+tO@FCl91XI$buPBRI@&o8x%PpnWapcQN~FpjlTUics$iS6)Mh_M`!ODbYo%}o z-=yEtgIDd@({M+=+Z@Wgo0}vmq-wQ+NNg#fBQ&<2*;PjpRZ-!9vISKF<*0bVoaIWx z9GNvB&9Y^1j*d)Lvs@{VBVsGkEL9rj$YyNyk!8!^JRKYUhc5rZdvjI4k?tQ+HEk~$T6irj_7@vLirp^!yMV3ytOCCEEVA>Xl%!`Lb$J3(s5db zt`*u*)8wl?bt|-^h92Y1A+M|oazxlqz$O1#Zh4%BHOY=Mia9m~=jdqTgdRanjk&(U zv;jAXk>w_a-j~rfiE2(3BVJKu1U=M%YY2q+yPXjtI;8i&4qGyCvSyQ)V`vkV?|G#5;O4|5eU*E`f10w1S~2FK zjuhRxdt@b96EiqRM@jZ?kt7N0$l9}H?)_pnsQ%;OqBI*J+x4t-)?4ik+vz>57M|az zPw(IKch~DJPA}YR>di-c_;IZ|Ymbf*0@}`(>Hhivmeu}wKB33i&AaRTK6`~Ly7$=D zf*u_A=`}RKD{X_@!TYc%d-(q!gNLGaEOIoYH&vc#i?fIA*T&&R=g7Za7C3N>qCFi} z>X}sc1`orsp^ZsXE%H;O3dxD+>ZvFSP7M^@?t7oj1e}ic>*ai09-t|9Vaxc{NSC!g z*5s+NkwW{_Pl&-=Z-e1cX`s!*;&-)b=$KM0Q53;ZIk?4t@V}nnuRnK(CmPd_Vln{P z7|HoZF`Mf0qe~DZgd{mu9w4%Ba6zTbrzB@C9yYVGdUg*n0?`|Mjh-OIV#9=qsZf*& zQXy-zU&Cq;+BB2zQvpDf3e=nE6+V0;`@>p$hrakKN8mLhgcJkIRV}o0(TB^wIsBCW z@6@){{tC}=lOghYyW^%1rQ^(FZsEL%`fSSD!Ez}-IO>y4Wgun@zvJb~LdNjBNif-- z{t^LP%PAMr!IUpT*)Tn2I0LFY>K;{jG+4lK(t5k}8Bk?X*o}3MD-mQXx`OR|wVRIx zv1T3Nm%2<=tknEsW$`@I%Dza~WU*PEMa&Wi&QMf7*vwvBlsdA*>Qu<~(6;e+x^3ep zWXt#gO~uc7h{Gd1jxN_3+uTwRuGza)9y8jdFdWq^0cn;(3d%{V3M_@?m@8~5DfabB z53cDOHjv{X-izxON9JNQ-5nkm=hg3$o-4#YSFiUzcrKUE%HYS<&N%u=m5+7cAw#%) z3(dw%@$ zL4Z^o4_Ez10Z{4skd0-i#d6Au4Dsjk`Tp|hs}PY@mM~uiU!DeO$WVb;b)Ug<{rE5x zNCHIf??_TS2@DX(w;KUrb){GoUC-g0xU|n5HYZy)H;nY3{fFT{gczLZ(FYBN|2t*V zqb51h+sz-H1COhttT0xbN10^gVg6u$zef}Q-5|BJ(bN%t#^M4B*wyr)S*9TuS zdJM0#dLS*CvehbCpvdsKFK8N+@K7t^b0h>Q4}{*r4!Ky=f) z(}--3VwzX~_VVWDDb~AlRO$4ke|zwWL}Nvz93 zCjzqj=Wp2rJoM1n45iK{$cI_2e>H?0(EXSkFQ6=Z;X>j?J=*cL>PnS6sKx9>_Od0mB zpJkdlM&_v$9uEJJgIu+O$jaD?Se4@Gu~3dy4*!f{9ov%| znCUr(y&<|tVW!k^R>|u3D&#*Tp7j7&tbWgp@o6B!E(gd+{TGNg; zPRr3@LS}>#Yp5i#UOzs5?FmXo(ceGb^oka060Q^D+)(6`^K_Z352W7n<2B~qPr{6* zb6iXJ9w!^vy7+9DYS8{64#2KYY#1h282o~ zjh10m##o=~v6$xhQJTJowSD;Q>iUldVfNkW*76l@sK#%rg_y0iGX4H|cm4d7CwyNo z#<;%LijZo6P?f^Ir9^jFej5mLZ=aC)YIE3^qZeb^t;B27%uuWxq_$JoczfT`(7*I< zZ%v;bHC(s4WLRHHfGu)HeC-LfyB-%H*JxHkR@9j8q=xJ&Do6HkDb_(-!5B06wvO_h^TBRz?~%>Ph$4fotMc^+v*I>oI+SzKX3_1<_1)9p>0Vd*_U5K{DJq#vC2jIn%E8Xm7USl+M|9ytg;#9d4e! z%NgKHjWb}f#f>Uhi*HBEom_sl|8%VAiG`U@N2E>VZbRAU|B>2F7&nStiQCNuY?;m` zSSa9bm9l*ia^7G5z4!DawEK?R39+1bQLe-*pEyjAGs#y?WWAX0u7A9}xgUOc6x)-p zZ-fx$g06SGy%{{--JyWx?5JYGWaIi?=!yL#!X|Dz`Y6XsrTh0<(T1Q<@`W!*1go{q zF7dHU1qagi3^u7r`N9ECEu1QaK*_mnJGk4M=RdJYa3>8SUhh9q1s^AKQOf|Z=U+!d!<-TE_Q=M)L(kSR;gbgztOGx^%4dU%5>0{fnzJX zovXf7ptcM=?&kh7&Kb9c@hA-W56^vR7p`+;`vGWZx*KJt(Xl1sDZlo51C;P{|3TWo z>DV^s;k1&ve*7lyBJe83V;>%=K!s%4isOOb9%wJ8kJ-2Y4s#TWw#re$$vGM(pHRP% ze1f-4K4DuWpMa_4yJmR*Y-Wa1RdV}?%RLNkl0By(8DQ%oo{3Z>n{~STH6-#Xz^$8> z`}Iat#4Th8g8?PS9Fg2#CI%%7yy;zilZJlViY!XocC%$}QJB;Tx!CD&<+Y%{>#Eh$ zFw}u=4a1bfR%sY&X|Qun%Y|P1TAcpd*|vi{NgJuPlnxUqbu(DZZ?9<=Sje`*4dGk3 zb<#RTSBHt`=2ATVakGojcBAe++Frx3^zKA$LdcUGlkL!m<%^84mC7Lcx4nmJv4YwH zv8X?21QdENx1O*txvtwnbC{kq>9%#)VwAO3UGw4H<#%aJ4l?PBl1ZUPm;l{NE%b;C z!C~+wHuSK_huLh#4t9%d21?E5AbSA2>puvS{IDzl%9aIT#Igvo8JHH?Je0H9^*LCp z(s9xCMt*booR}}wB9~{>t{1OCzX=P1j%zMKA>q)T zRv2-LBwhbhGOEi1PN=}2O0rz-W)*IR1KOIxPP%*;(kKE8$I@3A_Zz7r?8D>3)!pN@ z(103wMqo@Pr0QC)f&^3+HjNA;BxXd=RmfY{?5bQKC=N*OMH`An>oU}Xp0tl8De>HU z5*GrlXAG41c6HPLj?q|9*=~v3GBzrRMj@R#?d^Se{5U9k50_WcswzfP`z~Q_AWop& ztZer(hY2Mu6P6gO<+fHYxO>=1gZ5Tf5fyGPVmY4>?MyiosJ9+N2&Gr@8$_y(1d?@BK7l)h_SH#dHzEJ*Nd9K z(Bkp->QdUe@>^*;oXGO!8W^p&DLWz_Ts}z0?Ls~edQaDMydvaty-Ef z9Af@(hh8{ZbTs-CqLyu@1cje_!)GcEfi=$H=RasCl%R2_6wnBoksg-Nd4n;13)z%NxF*8?=+n6OK5 z*A+!@p<2M&nN4__;G5oe+3rM@=so;#2}3|3e_WT;jHCbMvM6Moz8076TTy;lfrieylWWE+_@LKG>!dH$xDr0u9#X)e(J%>@O!MhS{Wc9S07wK zkSiP{hG?}>@v2b_2ffR$n7zajuJdT-D5h|j?4V{#twzgO)2+pksOiAeM4rrO3CF3g zc&aTW{Ld%s>FE86kOr1eIu@aFF#cNVD%>P5$W}({Eni48i)c+>O@Y$(RlL&pRfV+H zpA?Xqe^Nkd|3df0c_eoLD!s?cYLixLlG>)F=EjjI?Nd*O)J+Ev7|NdNFUO)H0%gcW|K?ewH^2RfBPMW^(LyE^|99 zk@J+D%E_kNr>KCcMV-rb3q?M);{;IUbO%*mU?qa3nbgdVx#|$&_E9o?a3&;#7+ycY zm_eX9gO3;(ib^(cHseVghts08Z+?f>9K}diMa90wwFk?kYU9SEuXGJavi+_AxOgN2 zZ13|G^25vI1;ekfT2T;o@cc^Fbh_E1+iU*$LauJr9@-X^A(IVOz0TCU#qCbl@q*+l z_Vq8X#i7&M+WdT(%tzylxtd`TPbmDj^YbOqEUQ#~#R6JbfVyLP6A?BKr3qP;@UK@& z!b6C1E7kQVKVRT_UOnv(iOAwm7$!Xgm0cO!e6sJqKEqH?An|nv?OU%qo(HH*_5q>f zjUD^H(s*`xecc-f=TlY98R|8s(&p=xTqifph=OyVk5d6&;Ax^?=-rfz+{QFPR zEZl1X1kLeqw*O44!lNdPxqRmQGd8 zpHo~9aXf0aZ3j<}SH1EN@ie^awsk7_^W~Q>y&JP~5jREsK13wa@=TbHnyHcx2bbc} zgF6%E>s(|?qSp*t5Wyifm>(5pHy3(FzQCaLsedUn)ZkQe3$Ht=3j%J=M|_||%*rk3 zuDe{cn;K4Yi*Ez}8o$9#1AuZ_+ja~_jWAiV%bDf)Z94wR?Y4oiE+-gKZl;sbYCj)~ z(1IP5kngaUDG+cdQh6o9Z}5Ue)LyUa)B0y+mHCOgm?O+umEsk z_-h_N&xp&3JVe~1-oyx2%!zSr z9j^8@%D-SwGBy+Ca(B*V42cY3uP+YZJl)Y>))7C*O%~Iwb%PanggUJo{X2Rim*6!+AM~j%ZKT& zy>WClS-@92s+mhZ`h$0*g&Zux?LwmN3!&!ISO^HJo)paAmhhRgCeLe1jTgi1M|xN_ zB`b#8kGQs*7M20-tNlCLZb$SghoXD}RSQBfz{}bzJq4sNDTv@RUZfdQd1i=8IKI|_ z%}_%&#r@`VUZr65FfO?g4-vjBu%n30w?vvvtO=|@MxX41LRwDB2rpz@JgPMKVxgms4 za;{)AQcl=kd|SDaF|-?x5x3ax&S%U&3$$f2H5&(~n-yIXR`Sx0aW-e{5#Y z@eTFbUT^qByP=NTeCVA!O?xrjh%3x*@m*XSDRRPUeKpp~0A34mxQ3!)7%=2FwJVl^ zN&fW+)4g`(MEe-^3~~KzH0mX@%$U{mmIL9 zK%K!APRwx7M}*Uv;^Jw#S)lq4Kk>l%^4=sqG+0(46us~ z*4RwGnln~L@N`1v;0dd(crt{2^oDg)##vW{c6b*t-fLgIE{eLd*S=bvlsRp@QT-Ir zMTMriDr4+KpBOiL8N!8EufsBu%YZAcjNs_(a3({z48nTOxLWsDP;dUnX!k~a{vepL z7w72heoiO%Q#gK|t*J4g7xZm)MX8vk{eZ!wZQ43EoGUNsp0zoA(H55$$C4Ts&N zw~T|o?^m+yr{VnSb3fhn>R!dq%~arNI>S(PR2H4P^=N|9Tu%-Q_!u=MRmIvl8Jc|* za9P^j^ZLqM5&Ava&!<5ndx~RBN5(%ZHJ`rL6+Qh9KNpz!d`3!jDy58rbf-~*A&$Ru z>{;&nS5%dK%AV?q6pgk3@9F6_fQn3c8zg|E_k+m-d5#%?Z3`Gl*4t^l@^rcb8gRlX z)!pcGeXx$|s}d^Ec1(++TKtT=c*1Z~oS3=E)qFMDem>xx3dU3<7>}lV9<8p|nD+cK zwWs$ZZi~mz=w?2pInfJBkD3iyO*BtH3GJ_G^XuCR{w*g>xKBBC1#>Kn=s)xz1~Mgh z^p{>3nwrhK&1jGJ`x?_boEdw6-LCocM%%@wkOkcW8r`iw%ry3rXVugD`RbCEq10Sx zQrY0YFe$hk!dpSS9nm?znVUr&9uSz`9F};=q(6JWtI4|^GC9iNPNgbubD-4L4p*CP zV_O-wbCAC`^#0c8&^%$}H(!w7bK|W(&h*nu6ddXO!V$YzrJC=`uluKOETN$rF=cd&_gS`FL-zZv(W8)FV*!<7d`8C zkEtm3;lf%ZQ63Gp>peQ!FQX0ZrQ1qdnL za~?&o>@;0Cy>W5Moki~lmRj%2W&Ocno`2i;`O)8Sur>1u>P44Kb7iQja3wMFoWi-!5ggRk!b8*62F!9un;4J}ilfIBcA@9Hx2b*Zq}CrdGZ=c{%@!pGf5hFgkkP1~ zG_*c{52n?!ZUSASyo(N}mh+&ueEE@^Io{xL>g`(UIGRZyg|^I`;=l$oKL;powvLG0rE=p3Q?u!5#WU9Kk2iycO`CE6qZ zFKRRAMr+7r)G*Vcn7fr;QC=2MZ)-K}{rIE`bi~}xpBuOFRzcP6x%z1>`diuDc4or$X?nBNWSO0~le9)4(8rGGK@o23kUs?;C zJ+7XnJ6Ci$dTQG@Q&c-@+t6sTcsB<@&EDwjKfB>tZC$q7LqsRs6Za%>KF52K!M{RLRZT5$F6|ZTbdLIEt{5B4dl0#+368hLTAF#%FRG%y0Kge-FBy69b!Bl_K!1l zLDW4qGqFGBSPr}6kn7P3PtiY3_lNE3;efYFVzr73sP(kl-OZO+6yX$mifsN(sJn}5>%S@0_2xrq6-K6P z=UAK$)*G|&e~UTq9j4mSy)a*;`= zQuK~nqf7dCWnIB@OD^CgRTOC5h1SnlfbQTYh(rEU@t@pm@mAx%tG=F8-Dsl5(kUO6 zOPs>Sq3Hj0w^7hc&+N8eVW~%t%n5zo|G84lu_@}#q{i0etqH2wx1;&uu$^Moj+))0 z%8Vm`dHmRAab)UyfA!g?fNQ5UtyhFDcU}JHxMZfrq?1aSQ z2L|#6x!F9GT3m6?3K%Vihvkws0UcwoG~V)lrtxoE?id%Pri;{&b=Kv>u;0V`Z5wR9 z|87#mnPJ}#htK%GpiTQl1s$X z8@vfcRy$faLloZ*Si`i4rZ^c*CUzU_X|%#dC3b_{US*>h)*>lRVBfq)pRseBXhMpx zzdFVN*WMT_tH7U z8M5JW`}$6^wV+MdlgPLA`e*IC*@fG3JIzJN$)BV79xONHk?(c7?fpO{uO-DEisDESC#uGBU|I_& zhg_q@qO@57Q_Iq)UfAf~(rrXEa_2ApimRgiLGOxyw^V{@ey~RW4Fkc$Ou^(+C z#84Q53s4bzaf1c5Ihk|TJ+vQEp55HsE=I3rhXxy4waVSO5$=QFBlf`}*+{Z;#FLQ0 z+b13k9WCr;R6O{B5cVm&xzzyEG_*LqxoLck^Wo;ExZvpS)~`WqaowTVF!-6Rv^v&A zb^=e>H3liJi~9qPZ0Kx+CzD6W64U}dSmQi32%*5BXqUsxo7V6iZJA1~~LzO+OxRAy4Q?kxw|Hq;y_Q z;Z}cY7Mw;`HMQ|ja(49{M;@EEe@?rinD5Ygjo-)yv$uqm!LeXv@NdeBa|~U*$t#ZC zO(HLv84UZWKH~57mOtWAnKBCx^RfcU79=c!r~4 znt(m8t6GuBu>Yho=cEgj8?C5*3{T5JTsr?^*1FW%8Yrd4l)q^#motoSRj%l5C2lN* zn1%mbyGCU;N-UT0=07QW7PM@;L6okq|50@dT!)}}Gpig`TvE7W6m|y|X{dNhI+uvlGhrmM(QqBdW+xL&MxQ>Kkl zTbx#NKRojU>u5|yMFr0v5tCs-uo=AQAD6~*0BaW{D{kX8WxKS4XmqDuigF>==A^%) zz~PkMbs@v87;`}r=3m${!k8dX*a?T3RSUs&jeNZ@8pV^K0-JoJ38m9pGa=3aa%hQB zM=YL>k-)l=GaSohxflaxo)l1OsCL5Bu$_(5Vv55a*4JdaHO4q6^kafYT0qaR;9W*0js>*KYwF=pwXyP(E-tsDZhHSz+f5P;tGd#TP_VYdFEtuES z#P<7WaWD-*TKUjIijLo51GE}14wETQg}Ld{@4;vb0}md{3mmB|*DH0lT%1#owfM1o zq+)YcT@)b>Wnt{((x`b047C(%x5>swrmeIP#9T1SYj`H}W=GCOD@?!1qGpaXdM+V9 zPE3YsmBwfIg@Kt$g!~c49)bE41rF;Yd4(Hgxm5&PhEdYv@@Y>!?$=~g}99NQ1L zSG3>m*R->b21)y88C~YR_L?ZQA)#J9-HLpB1(vC-sK>QTr>lEd{amd-R^<#8L@*^l zcjIeLafH=4vUN2%Y$#iWX>f=xX0#>G4-sW+<@p>AnCOy>FjmxbINAN2Z_1B148e>S z`uk9eBaA^i7*iq@StW{Mdsvlke5sSrVigYAi;7(y$U@z*v=@}EhHjYi(f%YNexD0@ zTLDo948giAG4HVUe(8qX(>#@DT$KdNn+*E3e3j>tr7r{&fj4Mwmq^Cf7 zOB6fZcBZ7nkc+$dOaFC+6*97p8qL`q?#|MHGT-BJJkH{TeVaH>+A$CX902{`E$;fpih*KBF406Gy)8UzDs7-?8)S`?6dxlkJ_JZ7(LQw*zdSc}cg z{^$K@$IIi+Q8@hX=C3q2*!QH}{LYb24{$ID950DwZssxVX^M_HiQ(~)m->nlagJVu z8ej^tfVO64(3&CG<)6P!ra1y(I5;2Tv5)B_OayJlp%mIH5mVe`8vPt|AaiIbO$n>! zxx^|CoAPA1F}FsQwdFz3?v|s;J6N%37ZO=*5|3#;AsibO)ksRK5~|q(j{u5S!vPgF z6x1lXlGogDu2eEF*YT<;hE)YT!#fSst3i8#uF?$K$EZYTm_n~xMY?DQn;95V5NlR- z2Q+y$rhGz_tR9}X+m?Zo<2sI9p`KzSgw`=Ixq{+f>8c`E;P zFc$X$s~jthNqwvyY^>kbhXwA|Eqnq~zeYscQ|eZF066EE^oGPIF%$&Xv%qPBAVB1r~rd+wmTy>g?w0y;RlDk1*9K9X86O z`j9kQ>`n2bBI80@1P;0x?52yRTUERm;YMTf-BHrK!dyYvnl48mu4{o)&Hd-^j!Sh4 z+~JtgAysNriCaU_;qx7`uz`URx`sFlYLKEJC=1y=_w#SokLh2NL2taj)yOsT$85UtW{|;)3C*lt3&kxuTdlAzL8pr;yn|h_Y`FvZg=s+v8 z8%ZlT?iiB-oua>c-X5m6qr+kkV+v<3AT_vpOs90$3zlzg!v`gk7lLK#e2=kab7e2i z)azl_Tr|6f_poPr#3knI9MgW;&K!zt6f{r)Rm`$SPPg;f zNv}2jFPgiIaw(??;EM?96j+cS@cuk@P_aY8Z&68U_dpyCmxuk1Zg-{`*Dz8@0%4?J zq6RRB_2s->kom6|vG==rK&FuIJ1H53%aQ}KMB#A|%G+Nui_8Ay?k~K^Jdqu6pP>j# zbZc>EZ9*oqg%W~QRUCrrHY41%fvKSLOi;U2?=thSU&?j-0;31+<^GcP9nMGODR8;O~iT;)0u>kk~# zZcCFqDMdcs%?1TcCgyTltS{2*o2SS7%Wv=oef8~@+{wo|YF)5ePmuKb7u8pM?!QUC`#ZIl zJovdXe*HtCSMdtJ^P^e$^n^FdvVU_YK0A73GX;Mx`-8vtpKq_l%xmBVPyM0zf^3NU zd*=jhzrRT@NITE6Ly8wTc>k7hu6QjDNf}%|-4BIY(})>JkF{OeDsga z!Z?19qQmgV6?{lYs58e!C^n6dV6X)@2_Eh-{~m28x3DY~X1piJWICICUh{;RZWl5d#%ByBNXz+$J*1*2w${ml~$IC!B|#!@6m_ox@jfypSwT zVAj5dp$ZJvO^KsYNG>y)cSt{;&R9rCublxMpE|%miV4;}e@uS~!skYo!bG{SBK!JYoTAFuT4U;+-M^|` zX=vFfuv12DHnRQ*+L>=a0;RD5qB{ks7FLT>`sWaL%IOn1>kz~wb{J68b?aKcVx0!p z_XI@YE}ccvB^N^?)nG|X+-eK++@~EkYiL_jXc-bOZh>I8hc*~-Kh=21;><}{JW=OX zFFn64T8k+`-g|iz#s;pl-&XTqoZ%=mI+)>Ww{U_$8*SoYtx;fyVUdnrL!V&f z32!X*dg%F{5SU0Tb|im0b&fufDmVv$6KU*NlY?fv#Ye}JXpzD&Du9cwX^vEnTt^;t zGg2f8tCmDAP>~gZ#;}LS{D3aUN_)hDx|j%<8??p6kUkAWud&^28%YbvZXkInInt7{x}YJ`q#GLyCdtyaeViA+SKkEr-z*!ltd;BC z8F>-uuZ%4*cErrSJF3DleHQx46F#HB{U=;ohX+UJxP^?Bkp;^ab{cmKfMlE0xtXqZ zG-{Kl9=ZpCVV*zqmsA0p^v5@}V?rTbl5QCHVWfwTrxQ5Em_r9gG8oOb{Uvt%ahsz@ ze}^aK@Vx95#!?d_I+`JGo4~o9>4zE>qZz-w>xKL5am#CBDGs=q!O}pQ-89j~5#J;s zKO=3}FN`fcqlF{Ch#q-D4nthIxLMv=DYZh2Hj?^iuuOI(zV!aD>jXU1 zO}BdtA@ID2+Pm+gPW%@t&Hz0HsdTQp6K9rFl1SPYWqT``V4gwS4rJbRb% zba6_Wr^V=^1A2R^0aPmLQc$7m|{TOQDUPyWXbp_gt@6yyofU4r$8|4YnA#Y1vQ z8i(-ayx&O+OM+D6{qdVa>kl)q{-BZd6vJvk@8+4i+%`59au-;oVa-%p7stH_N@&sO}d-3F{$#g_L>hDB-K{L_A za>_C;q4|j5$=YU-mH8peO#g$GyV2smwr=v(Bwd*1{fpg@(&CJArpcuybIpnbM}gQm z{V7bsqyaeNbu+F^kz>zV6%FQj|4g2=m_Gh9wEyxP@t=tlSIgZnBToa-m0rX{i%>{d z{0d`nTr}dAe6xDcN;)xZkcRyW9Mt2%WcbHC!HOC8@8V{DG#kNGGmXKLpQgikzvbAd zfR`Hdoq6cV!xD}vj30+s;nOP^_mXK*H9Eyp4_Z+^4TlYS zvtLf0uyJC8&2zKU7e&y)Zqnz=)XX7WRBjxMBPzP)80&dG{M;?p^0lGPk-Dr_@D68#)v;T3-&B&Lxaz24Gv86pP#OBj<>(3Z9 z{Lu|8$7nOR=6zi}yz|UCQrv!bN(5(%(vS;(Y-G}Hyd4kWX~mU`uL6x~#2HBM?7++6 zd=^gtI_K^(!^IjM09-g&k4wWryg18U?T07~wBt!<_mJ|n5BQB~;gaLE+AzG1gelZsC^8Yk;GaWyeK z;&F)C0$Y6X7}n|Hgx`2|m`BFZf7{GAadnjk9w>_XC)Ui9%gK+!t|~<8VQU=^Qn;fM zrW;XF78EB)0-i)Yyr2P*6F6#4crXCF72R?!-iLu#35LR({|Y0iWIu9j-HutL1mWD< zPkDuh+9{l$*$wdF0Z$X*G2tzIlJZZxDif-~i38h-#g6(hOb!me+%dfosvi;HEg>64 zBvrrz0Q>EGMq`a5bf+7RU`uW;Xc35Ach=ILF#kr3ET^7yCACGb!s4)mcm zzD?p_okpC?lynv%goohjK z@>r%~bO1G82T)^o08NM8#~dg0ih`5*yGD#4jq-I`uR~Qfq2RG~N#k`RtIWGb;@3Fk zgpIYLudzag_cS6l7Y5^EHlO1N&F45q^Er-^x#Kv`UvOK)OkeRlcU-k8lU~;Ag=wpD z@Mi4f{_*^0hwnU)80HR`{gVa!*t!{wzcyyIuxT^bd$y0 z_)HEqO&3|~*;6Fq`588IO{_s=v=Mwv{}EV%r9@JyQi)%aZ8I=s`NJ~vFIN#a}z!^DeE*T~B1hrj)vE3#B?V5!i9K@s) z|21zF)~hg$NR5_YtGJOkG0g#E7ag}Hkr&3l1eu+9SFzGf#gIG@F_B>dC>aG>1&)b@ zatab!aTW#2^>O=cg|!v-2qa}L?*8|KY-A@>wyQZF-c4T$y!L0l;WjfSD$hj;;|6#R zuqQ{2+8Rb}yAE+(D5u-%$mh4Rj-enKhb!kifgW~gBl@HWV8;W>7k+gKUb>tKUfijL z8dTYo0?9fdA7-7E!%I^e z8ddcavje;3&i)E6Xi;aTmf{?va89q$p&CEaktdY9&u-`J^t^ZXd3tivJw;*YkNmg+ zPZ+%-#X8u`hL0AfU3pmTMl*4H`M63G`HsC9SpziDP^tkwF;ni|53@mF`){7I3VSt0 zL-?4M2t3R-vN-^{rJ{#BC-0Sk-5tMXoWq`eD-%Kz=F8`eG-?+I@^{(Y@JP!+G zDIbfYSDb&cOF?N$c2-4|m@!{esMJK+JaTK>cXoOl)%51*LycORx7ZE)x2L0`Y5YLjHa2V<%ThjjJ;1ND|AXbYaD1exH4><+D#d3%tI zw`yC2$RxUb(AXoy6PEnCHum>wyEaw8>tg*PoWY&btpHk(X{$rWOs4?ux4B7Rc5c0R zeNU8{{nA{IDo5~8vNk)MY|qgy{=mWw2b{O>IBb%qg|D zS85Fj187_^uCHF}3#N=-Ci>~!FQj~-*%)6RZX&RvXm*5dhY{c#5$Sah~)PY9yg zqOsuM(&azRg?J(0KXE?Bg`u+LhU>J(mU^K20*JP9xSo zkl0fDI=Nkt{Wxu5MbiW&`vuaVW)&T08K=!hJH202p!?_bPj=UeZEQS{5fvPsoMa$6 z3F?XnCYVnY7WEAdtsG2PO*q&vfWGayWT2Wy2!REo$nEVcRkH9=Kn{zL}k< zNOR~Q)4Jw%L)96Fb5PH~6H!x7!=YBlw@RC!xZ75cK&_y1)OXRsww$T40Uh4(#Z!%# zmM+tkJQTrH4G;0{-Z*c>!=~fAyYKf^lFa~M-bH~6cd2`5*7Kr4K=nJqoK+Dxa~}bQ zH>Yd51kid0b~@&QyeA!8ThlMp^R_(^cKhKj!&A@{v`*qDA90KA@J)L!Y$``xTC@_u zpm9NS!iAY@31f45=R1^5%@i6X^3QeE&2mM{G%t&waSbtPr&mz0S~6Wmr))82jH>|I zzSM}~w%AO`Zw?d9H2f3oIaZrnh~SYlhGL>rrLd1}cFL|em9Zd7cSkmN(iv!sF3!9a zyj@wF?pEl=1zC7|VwWYpHgBa^tD({ie3@=5j)!&$V52;cHm{X-Psje`i)GRyv!kkc z3EM&n14Rml1m7v!pDqg=&PY@GSXmg68H-0ZJJ<%ckMVI;vzB3Y==veK3$1r%*ah(M z*dC7=gerFIy8i3G|NZi+&(*0M{~E#Kj3+*E?dm$2;m3StrUEerZ|a%Vcd*n!4H4H0{!&xZtg{n1Vv_VoqB8LsLXpp_V=LplHBD4to&Epa{hfXDy*Ha!9LRj{eee5y-T{k3;i^h!tLpSgMP|F58_JM{2$M|kjb+#l=G8#`r!T2fG&`N(UZ*pI;hxv_we=D*I%TFldby{yE1v^&VWTgB)h zOUG$&l(g?2+RfR%os2ue|3o(A$n@P&Zymam_W2J7_;v1t>{$}r2t=-2hFEux1t)ND z;DzTV9>n;mM;f{|!e6(iOGo(BG(L8Sah$UW?lfQysPx^0*qwKWo&X;%M1CQ$T|uUE z%^7$16wl2aZfuVI;M3F81~6z}X0VSvQ;~+EuLGap7b#>UR`&e|7=-rnak%K>*C28? zD*Ch&W`nAZRbb>le6>jo{S8+$U4CLy5q6^n%7c;FCH2{@D|+9 z;{-_1*YS^K3BDaik6cm>+=c8dxntEDjEhauCo=eg8hisF!Ov?r{lfa>3HG8^V8Gc(23X=WR8lETcLKCiQ_+jXoXW)gqLoky}GQ%aIt~NO`GdNhr*Arnv z#EKe5l!0aFtZbbHCSw(v6ltu?0&<}Vl!Z#L#O*?mB;><7VrfJh+J^o^GPJ=uMBHPQ zLiL`5Q&V`s8v?l-6MNn)Pm4T>5>BiG$KYSi!|rHh97a$7FH0AA!M|?tcD}nvsq>=P zA$k>P>n}G!=0^Ba{)+HdL{apOA>Cv0_d?$1uL^%v_^rm?@{d6IE%2xDU7^TCfkmT> zYupQ2OrgX@{qBbJ+S=Nb@{OLIRNm-e?&RH+P3x_8DxZVme>{F@8}mcknE#n^ii>r( z^?x07e|dcGcIJDxGyl^|>_9oKzqoa8Kms>a>$8|JznCz;zn`VrXLHVtLIL_#e&F;DDlu}uo%cy^WRKfB8He?>SIw*4+b!R^S4!R+QoDW$x%*SyN_ z!1^|iKPUV-iC-jd?Uiz>`2iBRO&Y%;d_#C^Z}Tl+vHg^93f~mIWE$M^zX6N;UirQ| zc>DXhuvc?#37x~DoJ!pfzeAZHe|Zb@%iYY+65U=YubMw0fjW&}zmxfTf%*DowPY#P zNtNB8@Z;BaW%)}FrKXgy@p}zF{3`Rq{Tx3_x7$JWTi4-7v2xyG1RtLhfwFF#KrT`8~f6G&tuu<4O` z=7$&9@?lUPfdf^KU%h^Q2E}^rra>Sv|L$Fs{Z<1>p~ z5}8v_n*6Vxf#H2Ugl2&9MWAYa+}|R#q|M~-;#gLLJ&$LLsq8kXMb^%6pws>%Q2)%d z*Vr#^RZ;=GcKqTgXTSiRwX2+be>p<(tP?RU8lw9%CBYQg!HZ)V$u4_>G$@?h!8Ir*I?l_i1^e>19 z(=h#7M}M&&(w}+$>K*svJXb$F@1E|IiK>4GSp7!#T@d{rwxj4@bN)|w%aJ9S7bbf= z{{AL-xx=*Imyz{DlL-SOR1iiZtAVo>pt2wu#+GGcU;tqdfU;SDOb{j)K$9v1$Tk=o zo#uiX4x&N09;g6>(E%a1z`_|sgRp`h!~_rx!m#{As$2H+Gc!2wLUe;@5GK_v*!%(# zBh@Xm2p3kU<4AQ2Ey4ww`bZ8JTDS#TybcPtz{+q^;s_KbpnU8IO}ii(gegtC$mI+y zo1)X`c^X!pV9N*i$`?X;8e|@_dq8}cTly?CKwO5qC%VApC`=qy4#Ymu0?S|e4xvwq zLCSBKJdB2^dw`}6=581bQ)dfR_YzG#ET7?0{|QY!EHA)lbn`u+>LZ}^Z#4a|^h-#8 n22>x*57cmgr8yXlZht3K-&`ED!B154VDSW_Ni`2<54yPk8Or Date: Mon, 30 Oct 2023 10:21:00 -1000 Subject: [PATCH 077/144] [lldb] Part 2 of 2 - Refactor `CommandObject::DoExecute(...)` return `void` (not `bool`) (#69991) [lldb] Part 2 of 2 - Refactor `CommandObject::DoExecute(...)` to return `void` instead of ~~`bool`~~ Justifications: - The code doesn't ultimately apply the `true`/`false` return values. - The methods already pass around a `CommandReturnObject`, typically with a `result` parameter. - Each command return object already contains: - A more precise status - The error code(s) that apply to that status Part 1 refactors the `CommandObject::Execute(...)` method. - See [https://github.com/llvm/llvm-project/pull/69989](https://github.com/llvm/llvm-project/pull/69989) rdar://117378957 --- lldb/include/lldb/Interpreter/CommandObject.h | 4 +- lldb/source/API/SBCommandInterpreter.cpp | 6 +- lldb/source/Commands/CommandObjectApropos.cpp | 4 +- lldb/source/Commands/CommandObjectApropos.h | 2 +- .../Commands/CommandObjectBreakpoint.cpp | 105 ++++----- .../CommandObjectBreakpointCommand.cpp | 25 +- .../source/Commands/CommandObjectCommands.cpp | 151 ++++++------ .../Commands/CommandObjectDWIMPrint.cpp | 10 +- lldb/source/Commands/CommandObjectDWIMPrint.h | 2 +- .../Commands/CommandObjectDiagnostics.cpp | 8 +- .../Commands/CommandObjectDisassemble.cpp | 12 +- .../Commands/CommandObjectDisassemble.h | 2 +- .../Commands/CommandObjectExpression.cpp | 15 +- .../source/Commands/CommandObjectExpression.h | 2 +- lldb/source/Commands/CommandObjectFrame.cpp | 76 +++---- lldb/source/Commands/CommandObjectGUI.cpp | 4 +- lldb/source/Commands/CommandObjectGUI.h | 2 +- lldb/source/Commands/CommandObjectHelp.cpp | 8 +- lldb/source/Commands/CommandObjectHelp.h | 2 +- lldb/source/Commands/CommandObjectLanguage.h | 2 +- lldb/source/Commands/CommandObjectLog.cpp | 42 ++-- lldb/source/Commands/CommandObjectMemory.cpp | 147 ++++++------ .../Commands/CommandObjectMemoryTag.cpp | 30 ++- .../source/Commands/CommandObjectPlatform.cpp | 102 ++++----- lldb/source/Commands/CommandObjectPlugin.cpp | 6 +- lldb/source/Commands/CommandObjectProcess.cpp | 93 +++----- lldb/source/Commands/CommandObjectQuit.cpp | 12 +- lldb/source/Commands/CommandObjectQuit.h | 2 +- .../Commands/CommandObjectRegexCommand.cpp | 10 +- .../Commands/CommandObjectRegexCommand.h | 2 +- .../source/Commands/CommandObjectRegister.cpp | 14 +- lldb/source/Commands/CommandObjectScript.cpp | 12 +- lldb/source/Commands/CommandObjectScript.h | 2 +- lldb/source/Commands/CommandObjectSession.cpp | 6 +- .../source/Commands/CommandObjectSettings.cpp | 91 +++----- lldb/source/Commands/CommandObjectSource.cpp | 33 ++- lldb/source/Commands/CommandObjectStats.cpp | 13 +- lldb/source/Commands/CommandObjectTarget.cpp | 215 ++++++++---------- lldb/source/Commands/CommandObjectThread.cpp | 140 +++++------- .../Commands/CommandObjectThreadUtil.cpp | 27 ++- .../source/Commands/CommandObjectThreadUtil.h | 4 +- lldb/source/Commands/CommandObjectTrace.cpp | 21 +- lldb/source/Commands/CommandObjectType.cpp | 108 ++++----- lldb/source/Commands/CommandObjectVersion.cpp | 3 +- lldb/source/Commands/CommandObjectVersion.h | 2 +- .../Commands/CommandObjectWatchpoint.cpp | 84 +++---- .../CommandObjectWatchpointCommand.cpp | 29 +-- .../ItaniumABI/ItaniumABILanguageRuntime.cpp | 3 +- .../AppleObjCRuntime/AppleObjCRuntimeV2.cpp | 22 +- .../Process/MacOSX-Kernel/ProcessKDP.cpp | 11 +- .../Process/gdb-remote/ProcessGDBRemote.cpp | 27 +-- .../Process/minidump/ProcessMinidump.cpp | 8 +- .../DarwinLog/StructuredDataDarwinLog.cpp | 12 +- .../intel-pt/CommandObjectTraceStartIntelPT.h | 2 +- .../ctf/CommandObjectThreadTraceExportCTF.cpp | 6 +- .../ctf/CommandObjectThreadTraceExportCTF.h | 2 +- .../Interpreter/TestCommandPaths.cpp | 3 +- 57 files changed, 747 insertions(+), 1041 deletions(-) diff --git a/lldb/include/lldb/Interpreter/CommandObject.h b/lldb/include/lldb/Interpreter/CommandObject.h index 004f5d42f1e44e..7b427de0264f75 100644 --- a/lldb/include/lldb/Interpreter/CommandObject.h +++ b/lldb/include/lldb/Interpreter/CommandObject.h @@ -401,7 +401,7 @@ class CommandObjectParsed : public CommandObject { void Execute(const char *args_string, CommandReturnObject &result) override; protected: - virtual bool DoExecute(Args &command, CommandReturnObject &result) = 0; + virtual void DoExecute(Args &command, CommandReturnObject &result) = 0; bool WantsRawCommandString() override { return false; } }; @@ -418,7 +418,7 @@ class CommandObjectRaw : public CommandObject { void Execute(const char *args_string, CommandReturnObject &result) override; protected: - virtual bool DoExecute(llvm::StringRef command, + virtual void DoExecute(llvm::StringRef command, CommandReturnObject &result) = 0; bool WantsRawCommandString() override { return true; } diff --git a/lldb/source/API/SBCommandInterpreter.cpp b/lldb/source/API/SBCommandInterpreter.cpp index d275da933919e5..c3cbb00145ed3e 100644 --- a/lldb/source/API/SBCommandInterpreter.cpp +++ b/lldb/source/API/SBCommandInterpreter.cpp @@ -70,13 +70,11 @@ class CommandPluginInterfaceImplementation : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { SBCommandReturnObject sb_return(result); SBCommandInterpreter sb_interpreter(&m_interpreter); SBDebugger debugger_sb(m_interpreter.GetDebugger().shared_from_this()); - bool ret = m_backend->DoExecute(debugger_sb, command.GetArgumentVector(), - sb_return); - return ret; + m_backend->DoExecute(debugger_sb, command.GetArgumentVector(), sb_return); } std::shared_ptr m_backend; std::optional m_auto_repeat_command; diff --git a/lldb/source/Commands/CommandObjectApropos.cpp b/lldb/source/Commands/CommandObjectApropos.cpp index c6680f8b140d16..88c214d4fc56ab 100644 --- a/lldb/source/Commands/CommandObjectApropos.cpp +++ b/lldb/source/Commands/CommandObjectApropos.cpp @@ -38,7 +38,7 @@ CommandObjectApropos::CommandObjectApropos(CommandInterpreter &interpreter) CommandObjectApropos::~CommandObjectApropos() = default; -bool CommandObjectApropos::DoExecute(Args &args, CommandReturnObject &result) { +void CommandObjectApropos::DoExecute(Args &args, CommandReturnObject &result) { const size_t argc = args.GetArgumentCount(); if (argc == 1) { @@ -90,6 +90,4 @@ bool CommandObjectApropos::DoExecute(Args &args, CommandReturnObject &result) { } else { result.AppendError("'apropos' must be called with exactly one argument.\n"); } - - return result.Succeeded(); } diff --git a/lldb/source/Commands/CommandObjectApropos.h b/lldb/source/Commands/CommandObjectApropos.h index 042753f240328b..f43420c1815d90 100644 --- a/lldb/source/Commands/CommandObjectApropos.h +++ b/lldb/source/Commands/CommandObjectApropos.h @@ -23,7 +23,7 @@ class CommandObjectApropos : public CommandObjectParsed { ~CommandObjectApropos() override; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override; + void DoExecute(Args &command, CommandReturnObject &result) override; }; } // namespace lldb_private diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp index 18cbb9528b717a..e1d1c5e42c32a0 100644 --- a/lldb/source/Commands/CommandObjectBreakpoint.cpp +++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp @@ -528,7 +528,7 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(m_dummy_options.m_use_dummy); // The following are the various types of breakpoints that could be set: @@ -577,12 +577,12 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { if (num_files == 0) { if (!GetDefaultFile(target, file, result)) { result.AppendError("No file supplied and no default file available."); - return false; + return; } } else if (num_files > 1) { result.AppendError("Only one file at a time is allowed for file and " "line breakpoints."); - return false; + return; } else file = m_options.m_filenames.GetFileSpecAtIndex(0); @@ -613,7 +613,7 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { } else { result.AppendError("Only one shared library can be specified for " "address breakpoints."); - return false; + return; } break; } @@ -647,7 +647,7 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { result.AppendWarning( "Function name regex does not accept glob patterns."); } - return false; + return; } bp_sp = target.CreateFuncRegexBreakpoint( @@ -664,7 +664,7 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { if (!GetDefaultFile(target, file, result)) { result.AppendError( "No files provided and could not find default file."); - return false; + return; } else { m_options.m_filenames.Append(file); } @@ -675,7 +675,7 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { result.AppendErrorWithFormat( "Source text regular expression could not be compiled: \"%s\"", llvm::toString(std::move(err)).c_str()); - return false; + return; } bp_sp = target.CreateSourceRegexBreakpoint( &(m_options.m_modules), &(m_options.m_filenames), @@ -693,7 +693,7 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { "Error setting extra exception arguments: %s", precond_error.AsCString()); target.RemoveBreakpointByID(bp_sp->GetID()); - return false; + return; } } break; case eSetTypeScripted: { @@ -707,7 +707,7 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { result.AppendErrorWithFormat( "Error setting extra exception arguments: %s", error.AsCString()); target.RemoveBreakpointByID(bp_sp->GetID()); - return false; + return; } } break; default: @@ -726,7 +726,7 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { result.AppendErrorWithFormat("Invalid breakpoint name: %s", name.c_str()); target.RemoveBreakpointByID(bp_sp->GetID()); - return false; + return; } } } @@ -753,8 +753,6 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { } else if (!bp_sp) { result.AppendError("Breakpoint creation failed: No breakpoint created."); } - - return result.Succeeded(); } private: @@ -835,7 +833,7 @@ class CommandObjectBreakpointModify : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(m_dummy_opts.m_use_dummy); std::unique_lock lock; @@ -868,8 +866,6 @@ class CommandObjectBreakpointModify : public CommandObjectParsed { } } } - - return result.Succeeded(); } private: @@ -906,7 +902,7 @@ class CommandObjectBreakpointEnable : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(); std::unique_lock lock; @@ -918,7 +914,7 @@ class CommandObjectBreakpointEnable : public CommandObjectParsed { if (num_breakpoints == 0) { result.AppendError("No breakpoints exist to be enabled."); - return false; + return; } if (command.empty()) { @@ -963,8 +959,6 @@ class CommandObjectBreakpointEnable : public CommandObjectParsed { result.SetStatus(eReturnStatusSuccessFinishNoResult); } } - - return result.Succeeded(); } }; @@ -1020,7 +1014,7 @@ the second re-enables the first location."); } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(); std::unique_lock lock; target.GetBreakpointList().GetListMutex(lock); @@ -1030,7 +1024,7 @@ the second re-enables the first location."); if (num_breakpoints == 0) { result.AppendError("No breakpoints exist to be disabled."); - return false; + return; } if (command.empty()) { @@ -1076,8 +1070,6 @@ the second re-enables the first location."); result.SetStatus(eReturnStatusSuccessFinishNoResult); } } - - return result.Succeeded(); } }; @@ -1168,7 +1160,7 @@ class CommandObjectBreakpointList : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(m_options.m_use_dummy); const BreakpointList &breakpoints = @@ -1181,7 +1173,7 @@ class CommandObjectBreakpointList : public CommandObjectParsed { if (num_breakpoints == 0) { result.AppendMessage("No breakpoints currently set."); result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; + return; } Stream &output_stream = result.GetOutputStream(); @@ -1216,8 +1208,6 @@ class CommandObjectBreakpointList : public CommandObjectParsed { result.AppendError("Invalid breakpoint ID."); } } - - return result.Succeeded(); } private: @@ -1289,7 +1279,7 @@ class CommandObjectBreakpointClear : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(); // The following are the various types of breakpoints that could be @@ -1310,7 +1300,7 @@ class CommandObjectBreakpointClear : public CommandObjectParsed { // Early return if there's no breakpoint at all. if (num_breakpoints == 0) { result.AppendError("Breakpoint clear: No breakpoint cleared."); - return result.Succeeded(); + return; } // Find matching breakpoints and delete them. @@ -1357,8 +1347,6 @@ class CommandObjectBreakpointClear : public CommandObjectParsed { } else { result.AppendError("Breakpoint clear: No breakpoint cleared."); } - - return result.Succeeded(); } private: @@ -1445,7 +1433,7 @@ class CommandObjectBreakpointDelete : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(m_options.m_use_dummy); result.Clear(); @@ -1458,7 +1446,7 @@ class CommandObjectBreakpointDelete : public CommandObjectParsed { if (num_breakpoints == 0) { result.AppendError("No breakpoints exist to be deleted."); - return false; + return; } // Handle the delete all breakpoints case: @@ -1475,7 +1463,7 @@ class CommandObjectBreakpointDelete : public CommandObjectParsed { (uint64_t)num_breakpoints, num_breakpoints > 1 ? "s" : ""); } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); + return; } // Either we have some kind of breakpoint specification(s), @@ -1491,7 +1479,7 @@ class CommandObjectBreakpointDelete : public CommandObjectParsed { command, &target, result, &excluded_bp_ids, BreakpointName::Permissions::PermissionKinds::deletePerm); if (!result.Succeeded()) - return false; + return; } for (auto breakpoint_sp : breakpoints.Breakpoints()) { @@ -1504,14 +1492,14 @@ class CommandObjectBreakpointDelete : public CommandObjectParsed { } if (valid_bp_ids.GetSize() == 0) { result.AppendError("No disabled breakpoints."); - return false; + return; } } else { CommandObjectMultiwordBreakpoint::VerifyBreakpointOrLocationIDs( command, &target, result, &valid_bp_ids, BreakpointName::Permissions::PermissionKinds::deletePerm); if (!result.Succeeded()) - return false; + return; } int delete_count = 0; @@ -1542,7 +1530,6 @@ class CommandObjectBreakpointDelete : public CommandObjectParsed { "%d breakpoints deleted; %d breakpoint locations disabled.\n", delete_count, disable_count); result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); } private: @@ -1709,12 +1696,12 @@ class CommandObjectBreakpointNameConfigure : public CommandObjectParsed { Options *GetOptions() override { return &m_option_group; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc == 0) { result.AppendError("No names provided."); - return false; + return; } Target &target = GetSelectedOrDummyTarget(false); @@ -1728,7 +1715,7 @@ class CommandObjectBreakpointNameConfigure : public CommandObjectParsed { if (!BreakpointID::StringIsBreakpointName(entry.ref(), error)) { result.AppendErrorWithFormat("Invalid breakpoint name: %s - %s", entry.c_str(), error.AsCString()); - return false; + return; } } // Now configure them, we already pre-checked the names so we don't need to @@ -1741,7 +1728,7 @@ class CommandObjectBreakpointNameConfigure : public CommandObjectParsed { if (!bp_sp) { result.AppendErrorWithFormatv("Could not find specified breakpoint {0}", bp_id); - return false; + return; } } @@ -1765,7 +1752,6 @@ class CommandObjectBreakpointNameConfigure : public CommandObjectParsed { m_bp_opts.GetBreakpointOptions(), m_access_options.GetPermissions()); } - return true; } private: @@ -1806,10 +1792,10 @@ class CommandObjectBreakpointNameAdd : public CommandObjectParsed { Options *GetOptions() override { return &m_option_group; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (!m_name_options.m_name.OptionWasSet()) { result.AppendError("No name option provided."); - return false; + return; } Target &target = @@ -1823,7 +1809,7 @@ class CommandObjectBreakpointNameAdd : public CommandObjectParsed { size_t num_breakpoints = breakpoints.GetSize(); if (num_breakpoints == 0) { result.AppendError("No breakpoints, cannot add names."); - return false; + return; } // Particular breakpoint selected; disable that breakpoint. @@ -1835,7 +1821,7 @@ class CommandObjectBreakpointNameAdd : public CommandObjectParsed { if (result.Succeeded()) { if (valid_bp_ids.GetSize() == 0) { result.AppendError("No breakpoints specified, cannot add names."); - return false; + return; } size_t num_valid_ids = valid_bp_ids.GetSize(); const char *bp_name = m_name_options.m_name.GetCurrentValue(); @@ -1848,8 +1834,6 @@ class CommandObjectBreakpointNameAdd : public CommandObjectParsed { target.AddNameToBreakpoint(bp_sp, bp_name, error); } } - - return true; } private: @@ -1889,10 +1873,10 @@ class CommandObjectBreakpointNameDelete : public CommandObjectParsed { Options *GetOptions() override { return &m_option_group; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (!m_name_options.m_name.OptionWasSet()) { result.AppendError("No name option provided."); - return false; + return; } Target &target = @@ -1906,7 +1890,7 @@ class CommandObjectBreakpointNameDelete : public CommandObjectParsed { size_t num_breakpoints = breakpoints.GetSize(); if (num_breakpoints == 0) { result.AppendError("No breakpoints, cannot delete names."); - return false; + return; } // Particular breakpoint selected; disable that breakpoint. @@ -1918,7 +1902,7 @@ class CommandObjectBreakpointNameDelete : public CommandObjectParsed { if (result.Succeeded()) { if (valid_bp_ids.GetSize() == 0) { result.AppendError("No breakpoints specified, cannot delete names."); - return false; + return; } ConstString bp_name(m_name_options.m_name.GetCurrentValue()); size_t num_valid_ids = valid_bp_ids.GetSize(); @@ -1929,8 +1913,6 @@ class CommandObjectBreakpointNameDelete : public CommandObjectParsed { target.RemoveNameFromBreakpoint(bp_sp, bp_name); } } - - return true; } private: @@ -1955,7 +1937,7 @@ class CommandObjectBreakpointNameList : public CommandObjectParsed { Options *GetOptions() override { return &m_option_group; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(m_name_options.m_use_dummy.GetCurrentValue()); @@ -2005,7 +1987,6 @@ class CommandObjectBreakpointNameList : public CommandObjectParsed { } } } - return true; } private: @@ -2267,7 +2248,7 @@ class CommandObjectBreakpointRead : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(); std::unique_lock lock; @@ -2281,7 +2262,7 @@ class CommandObjectBreakpointRead : public CommandObjectParsed { if (!error.Success()) { result.AppendError(error.AsCString()); - return false; + return; } Stream &output_stream = result.GetOutputStream(); @@ -2302,7 +2283,6 @@ class CommandObjectBreakpointRead : public CommandObjectParsed { false); } } - return result.Succeeded(); } private: @@ -2383,7 +2363,7 @@ class CommandObjectBreakpointWrite : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(); std::unique_lock lock; @@ -2397,7 +2377,7 @@ class CommandObjectBreakpointWrite : public CommandObjectParsed { if (!result.Succeeded()) { result.SetStatus(eReturnStatusFailed); - return false; + return; } } FileSpec file_spec(m_options.m_filename); @@ -2408,7 +2388,6 @@ class CommandObjectBreakpointWrite : public CommandObjectParsed { result.AppendErrorWithFormat("error serializing breakpoints: %s.", error.AsCString()); } - return result.Succeeded(); } private: diff --git a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp index 921243829fc6b0..fefafcd94546a5 100644 --- a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp @@ -334,7 +334,7 @@ are no syntax errors may indicate that a function was declared but never called. }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(m_options.m_use_dummy); const BreakpointList &breakpoints = target.GetBreakpointList(); @@ -342,7 +342,7 @@ are no syntax errors may indicate that a function was declared but never called. if (num_breakpoints == 0) { result.AppendError("No breakpoints exist to have commands added"); - return false; + return; } if (!m_func_options.GetName().empty()) { @@ -412,8 +412,6 @@ are no syntax errors may indicate that a function was declared but never called. CollectDataForBreakpointCommandCallback(m_bp_options_vec, result); } } - - return result.Succeeded(); } private: @@ -506,7 +504,7 @@ class CommandObjectBreakpointCommandDelete : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(m_options.m_use_dummy); const BreakpointList &breakpoints = target.GetBreakpointList(); @@ -514,13 +512,13 @@ class CommandObjectBreakpointCommandDelete : public CommandObjectParsed { if (num_breakpoints == 0) { result.AppendError("No breakpoints exist to have commands deleted"); - return false; + return; } if (command.empty()) { result.AppendError( "No breakpoint specified from which to delete the commands"); - return false; + return; } BreakpointIDList valid_bp_ids; @@ -544,7 +542,7 @@ class CommandObjectBreakpointCommandDelete : public CommandObjectParsed { result.AppendErrorWithFormat("Invalid breakpoint ID: %u.%u.\n", cur_bp_id.GetBreakpointID(), cur_bp_id.GetLocationID()); - return false; + return; } } else { bp->ClearCallback(); @@ -552,7 +550,6 @@ class CommandObjectBreakpointCommandDelete : public CommandObjectParsed { } } } - return result.Succeeded(); } private: @@ -586,7 +583,7 @@ class CommandObjectBreakpointCommandList : public CommandObjectParsed { ~CommandObjectBreakpointCommandList() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); const BreakpointList &breakpoints = target->GetBreakpointList(); @@ -594,13 +591,13 @@ class CommandObjectBreakpointCommandList : public CommandObjectParsed { if (num_breakpoints == 0) { result.AppendError("No breakpoints exist for which to list commands"); - return false; + return; } if (command.empty()) { result.AppendError( "No breakpoint specified for which to list the commands"); - return false; + return; } BreakpointIDList valid_bp_ids; @@ -624,7 +621,7 @@ class CommandObjectBreakpointCommandList : public CommandObjectParsed { result.AppendErrorWithFormat("Invalid breakpoint ID: %u.%u.\n", cur_bp_id.GetBreakpointID(), cur_bp_id.GetLocationID()); - return false; + return; } } @@ -661,8 +658,6 @@ class CommandObjectBreakpointCommandList : public CommandObjectParsed { } } } - - return result.Succeeded(); } }; diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index 656ace223b5f15..74d97b0db16cbe 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -129,12 +129,12 @@ class CommandObjectCommandsSource : public CommandObjectParsed { OptionValueBoolean m_cmd_relative_to_command_file; }; - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (command.GetArgumentCount() != 1) { result.AppendErrorWithFormat( "'%s' takes exactly one executable filename argument.\n", GetCommandName().str().c_str()); - return false; + return; } FileSpec source_dir = {}; @@ -144,7 +144,7 @@ class CommandObjectCommandsSource : public CommandObjectParsed { result.AppendError("command source -C can only be specified " "from a command file"); result.SetStatus(eReturnStatusFailed); - return false; + return; } } @@ -155,7 +155,7 @@ class CommandObjectCommandsSource : public CommandObjectParsed { result.AppendError("command source -C can only be used " "with a relative path."); result.SetStatus(eReturnStatusFailed); - return false; + return; } cmd_file.MakeAbsolute(source_dir); } @@ -186,7 +186,6 @@ class CommandObjectCommandsSource : public CommandObjectParsed { } m_interpreter.HandleCommandsFromFile(cmd_file, options, result); - return result.Succeeded(); } CommandOptions m_options; @@ -384,11 +383,11 @@ rather than using a positional placeholder:" ~CommandObjectCommandsAlias() override = default; protected: - bool DoExecute(llvm::StringRef raw_command_line, + void DoExecute(llvm::StringRef raw_command_line, CommandReturnObject &result) override { if (raw_command_line.empty()) { result.AppendError("'command alias' requires at least two arguments"); - return false; + return; } ExecutionContext exe_ctx = GetCommandInterpreter().GetExecutionContext(); @@ -399,14 +398,14 @@ rather than using a positional placeholder:" if (args_with_suffix.HasArgs()) if (!ParseOptionsAndNotify(args_with_suffix.GetArgs(), result, m_option_group, exe_ctx)) - return false; + return; llvm::StringRef raw_command_string = args_with_suffix.GetRawPart(); Args args(raw_command_string); if (args.GetArgumentCount() < 2) { result.AppendError("'command alias' requires at least two arguments"); - return false; + return; } // Get the alias command. @@ -418,7 +417,7 @@ rather than using a positional placeholder:" result.AppendWarning("if trying to pass options to 'command alias' add " "a -- at the end of the options"); } - return false; + return; } // Strip the new alias name off 'raw_command_string' (leave it on args, @@ -431,7 +430,7 @@ rather than using a positional placeholder:" raw_command_string = raw_command_string.substr(pos); } else { result.AppendError("Error parsing command string. No alias created."); - return false; + return; } // Verify that the command is alias-able. @@ -439,7 +438,7 @@ rather than using a positional placeholder:" result.AppendErrorWithFormat( "'%s' is a permanent debugger command and cannot be redefined.\n", args[0].c_str()); - return false; + return; } if (m_interpreter.UserMultiwordCommandExists(alias_command)) { @@ -447,7 +446,7 @@ rather than using a positional placeholder:" "'%s' is a user container command and cannot be overwritten.\n" "Delete it first with 'command container delete'\n", args[0].c_str()); - return false; + return; } // Get CommandObject that is being aliased. The command name is read from @@ -462,17 +461,15 @@ rather than using a positional placeholder:" "'%s' does not begin with a valid command." " No alias created.", original_raw_command_string.str().c_str()); - return false; } else if (!cmd_obj->WantsRawCommandString()) { // Note that args was initialized with the original command, and has not // been updated to this point. Therefore can we pass it to the version of // Execute that does not need/expect raw input in the alias. - return HandleAliasingNormalCommand(args, result); + HandleAliasingNormalCommand(args, result); } else { - return HandleAliasingRawCommand(alias_command, raw_command_string, - *cmd_obj, result); + HandleAliasingRawCommand(alias_command, raw_command_string, *cmd_obj, + result); } - return result.Succeeded(); } bool HandleAliasingRawCommand(llvm::StringRef alias_command, @@ -653,13 +650,13 @@ class CommandObjectCommandsUnalias : public CommandObjectParsed { } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { CommandObject::CommandMap::iterator pos; CommandObject *cmd_obj; if (args.empty()) { result.AppendError("must call 'unalias' with a valid alias"); - return false; + return; } auto command_name = args[0].ref(); @@ -669,7 +666,7 @@ class CommandObjectCommandsUnalias : public CommandObjectParsed { "'%s' is not a known command.\nTry 'help' to see a " "current list of commands.\n", args[0].c_str()); - return false; + return; } if (m_interpreter.CommandExists(command_name)) { @@ -683,7 +680,7 @@ class CommandObjectCommandsUnalias : public CommandObjectParsed { "'%s' is a permanent debugger command and cannot be removed.\n", args[0].c_str()); } - return false; + return; } if (!m_interpreter.RemoveAlias(command_name)) { @@ -694,11 +691,10 @@ class CommandObjectCommandsUnalias : public CommandObjectParsed { else result.AppendErrorWithFormat("'%s' is not an existing alias.\n", args[0].c_str()); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); } }; @@ -742,14 +738,14 @@ class CommandObjectCommandsDelete : public CommandObjectParsed { } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { CommandObject::CommandMap::iterator pos; if (args.empty()) { result.AppendErrorWithFormat("must call '%s' with one or more valid user " "defined regular expression command names", GetCommandName().str().c_str()); - return false; + return; } auto command_name = args[0].ref(); @@ -761,18 +757,17 @@ class CommandObjectCommandsDelete : public CommandObjectParsed { &error_msg_stream, command_name, llvm::StringRef(), llvm::StringRef(), generate_upropos, generate_type_lookup); result.AppendError(error_msg_stream.GetString()); - return false; + return; } if (!m_interpreter.RemoveCommand(command_name)) { result.AppendErrorWithFormat( "'%s' is a permanent debugger command and cannot be removed.\n", args[0].c_str()); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; } }; @@ -868,12 +863,12 @@ a number follows 'f':" } } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc == 0) { result.AppendError("usage: 'command regex " "[s/// s/// ...]'\n"); - return false; + return; } Status error; @@ -914,8 +909,6 @@ a number follows 'f':" if (error.Fail()) { result.AppendError(error.AsCString()); } - - return result.Succeeded(); } Status AppendRegexSubstitution(const llvm::StringRef ®ex_sed, @@ -1126,7 +1119,7 @@ class CommandObjectPythonFunction : public CommandObjectRaw { bool WantsCompletion() override { return true; } protected: - bool DoExecute(llvm::StringRef raw_command_line, + void DoExecute(llvm::StringRef raw_command_line, CommandReturnObject &result) override { ScriptInterpreter *scripter = GetDebugger().GetScriptInterpreter(); @@ -1147,8 +1140,6 @@ class CommandObjectPythonFunction : public CommandObjectRaw { result.SetStatus(eReturnStatusSuccessFinishResult); } } - - return result.Succeeded(); } private: @@ -1222,7 +1213,7 @@ class CommandObjectScriptingObject : public CommandObjectRaw { } protected: - bool DoExecute(llvm::StringRef raw_command_line, + void DoExecute(llvm::StringRef raw_command_line, CommandReturnObject &result) override { ScriptInterpreter *scripter = GetDebugger().GetScriptInterpreter(); @@ -1243,8 +1234,6 @@ class CommandObjectScriptingObject : public CommandObjectRaw { result.SetStatus(eReturnStatusSuccessFinishResult); } } - - return result.Succeeded(); } private: @@ -1330,10 +1319,10 @@ class CommandObjectCommandsScriptImport : public CommandObjectParsed { bool silent = false; }; - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (command.empty()) { result.AppendError("command script import needs one or more arguments"); - return false; + return; } FileSpec source_dir = {}; @@ -1342,7 +1331,7 @@ class CommandObjectCommandsScriptImport : public CommandObjectParsed { if (!source_dir) { result.AppendError("command script import -c can only be specified " "from a command file"); - return false; + return; } } @@ -1371,8 +1360,6 @@ class CommandObjectCommandsScriptImport : public CommandObjectParsed { error.AsCString()); } } - - return result.Succeeded(); } CommandOptions m_options; @@ -1567,16 +1554,16 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, io_handler.SetIsDone(true); } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (GetDebugger().GetScriptLanguage() != lldb::eScriptLanguagePython) { result.AppendError("only scripting language supported for scripted " "commands is currently Python"); - return false; + return; } if (command.GetArgumentCount() == 0) { result.AppendError("'command script add' requires at least one argument"); - return false; + return; } // Store the options in case we get multi-line input, also figure out the // default if not user supplied: @@ -1598,7 +1585,7 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, if (path_error.Fail()) { result.AppendErrorWithFormat("error in command path: %s", path_error.AsCString()); - return false; + return; } if (!m_container) { @@ -1617,7 +1604,7 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, if (m_options.m_class_name.empty() && m_options.m_funct_name.empty()) { m_interpreter.GetPythonCommandsFromIOHandler(" ", // Prompt *this); // IOHandlerDelegate - return result.Succeeded(); + return; } CommandObjectSP new_cmd_sp; @@ -1629,7 +1616,7 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, ScriptInterpreter *interpreter = GetDebugger().GetScriptInterpreter(); if (!interpreter) { result.AppendError("cannot find ScriptInterpreter"); - return false; + return; } auto cmd_obj_sp = interpreter->CreateScriptCommandObject( @@ -1637,7 +1624,7 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, if (!cmd_obj_sp) { result.AppendErrorWithFormatv("cannot create helper object for: " "'{0}'", m_options.m_class_name); - return false; + return; } new_cmd_sp.reset(new CommandObjectScriptingObject( @@ -1660,7 +1647,6 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, result.AppendErrorWithFormat("cannot add command: %s", llvm::toString(std::move(llvm_error)).c_str()); } - return result.Succeeded(); } CommandOptions m_options; @@ -1684,12 +1670,10 @@ class CommandObjectCommandsScriptList : public CommandObjectParsed { ~CommandObjectCommandsScriptList() override = default; - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { m_interpreter.GetHelp(result, CommandInterpreter::eCommandTypesUserDef); result.SetStatus(eReturnStatusSuccessFinishResult); - - return true; } }; @@ -1704,12 +1688,10 @@ class CommandObjectCommandsScriptClear : public CommandObjectParsed { ~CommandObjectCommandsScriptClear() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { m_interpreter.RemoveAllUser(); result.SetStatus(eReturnStatusSuccessFinishResult); - - return true; } }; @@ -1748,44 +1730,44 @@ class CommandObjectCommandsScriptDelete : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { llvm::StringRef root_cmd = command[0].ref(); size_t num_args = command.GetArgumentCount(); if (root_cmd.empty()) { result.AppendErrorWithFormat("empty root command name"); - return false; + return; } if (!m_interpreter.HasUserCommands() && !m_interpreter.HasUserMultiwordCommands()) { result.AppendErrorWithFormat("can only delete user defined commands, " "but no user defined commands found"); - return false; + return; } CommandObjectSP cmd_sp = m_interpreter.GetCommandSPExact(root_cmd); if (!cmd_sp) { result.AppendErrorWithFormat("command '%s' not found.", command[0].c_str()); - return false; + return; } if (!cmd_sp->IsUserCommand()) { result.AppendErrorWithFormat("command '%s' is not a user command.", command[0].c_str()); - return false; + return; } if (cmd_sp->GetAsMultiwordCommand() && num_args == 1) { result.AppendErrorWithFormat("command '%s' is a multi-word command.\n " "Delete with \"command container delete\"", command[0].c_str()); - return false; + return; } if (command.GetArgumentCount() == 1) { m_interpreter.RemoveUser(root_cmd); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } // We're deleting a command from a multiword command. Verify the command // path: @@ -1796,14 +1778,14 @@ class CommandObjectCommandsScriptDelete : public CommandObjectParsed { if (error.Fail()) { result.AppendErrorWithFormat("could not resolve command path: %s", error.AsCString()); - return false; + return; } if (!container) { // This means that command only had a leaf command, so the container is // the root. That should have been handled above. result.AppendErrorWithFormat("could not find a container for '%s'", command[0].c_str()); - return false; + return; } const char *leaf_cmd = command[num_args - 1].c_str(); llvm::Error llvm_error = container->RemoveUserSubcommand(leaf_cmd, @@ -1812,7 +1794,7 @@ class CommandObjectCommandsScriptDelete : public CommandObjectParsed { result.AppendErrorWithFormat("could not delete command '%s': %s", leaf_cmd, llvm::toString(std::move(llvm_error)).c_str()); - return false; + return; } Stream &out_stream = result.GetOutputStream(); @@ -1824,7 +1806,6 @@ class CommandObjectCommandsScriptDelete : public CommandObjectParsed { } out_stream << '\n'; result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } }; @@ -1945,12 +1926,12 @@ class CommandObjectCommandsContainerAdd : public CommandObjectParsed { std::string m_long_help; bool m_overwrite = false; }; - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { size_t num_args = command.GetArgumentCount(); if (num_args == 0) { result.AppendError("no command was specified"); - return false; + return; } if (num_args == 1) { @@ -1965,10 +1946,10 @@ class CommandObjectCommandsContainerAdd : public CommandObjectParsed { if (add_error.Fail()) { result.AppendErrorWithFormat("error adding command: %s", add_error.AsCString()); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; + return; } // We're adding this to a subcommand, first find the subcommand: @@ -1980,7 +1961,7 @@ class CommandObjectCommandsContainerAdd : public CommandObjectParsed { if (!add_to_me) { result.AppendErrorWithFormat("error adding command: %s", path_error.AsCString()); - return false; + return; } const char *cmd_name = command.GetArgumentAtIndex(num_args - 1); @@ -1992,11 +1973,10 @@ class CommandObjectCommandsContainerAdd : public CommandObjectParsed { if (llvm_error) { result.AppendErrorWithFormat("error adding subcommand: %s", llvm::toString(std::move(llvm_error)).c_str()); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; } private: @@ -2039,12 +2019,12 @@ class CommandObjectCommandsContainerDelete : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { size_t num_args = command.GetArgumentCount(); if (num_args == 0) { result.AppendError("No command was specified."); - return false; + return; } if (num_args == 1) { @@ -2057,27 +2037,27 @@ class CommandObjectCommandsContainerDelete : public CommandObjectParsed { if (!cmd_sp) { result.AppendErrorWithFormat("container command %s doesn't exist.", cmd_name); - return false; + return; } if (!cmd_sp->IsUserCommand()) { result.AppendErrorWithFormat( "container command %s is not a user command", cmd_name); - return false; + return; } if (!cmd_sp->GetAsMultiwordCommand()) { result.AppendErrorWithFormat("command %s is not a container command", cmd_name); - return false; + return; } bool did_remove = GetCommandInterpreter().RemoveUserMultiword(cmd_name); if (!did_remove) { result.AppendErrorWithFormat("error removing command %s.", cmd_name); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; + return; } // We're removing a subcommand, first find the subcommand's owner: @@ -2089,7 +2069,7 @@ class CommandObjectCommandsContainerDelete : public CommandObjectParsed { if (!container) { result.AppendErrorWithFormat("error removing container command: %s", path_error.AsCString()); - return false; + return; } const char *leaf = command.GetArgumentAtIndex(num_args - 1); llvm::Error llvm_error = @@ -2097,10 +2077,9 @@ class CommandObjectCommandsContainerDelete : public CommandObjectParsed { if (llvm_error) { result.AppendErrorWithFormat("error removing container command: %s", llvm::toString(std::move(llvm_error)).c_str()); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; } }; diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index bdc17c9cffc779..695f3d7931cd0a 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -58,7 +58,7 @@ void CommandObjectDWIMPrint::HandleArgumentCompletion( GetCommandInterpreter(), lldb::eVariablePathCompletion, request, nullptr); } -bool CommandObjectDWIMPrint::DoExecute(StringRef command, +void CommandObjectDWIMPrint::DoExecute(StringRef command, CommandReturnObject &result) { m_option_group.NotifyOptionParsingStarting(&m_exe_ctx); OptionsWithRaw args{command}; @@ -67,13 +67,13 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, if (expr.empty()) { result.AppendErrorWithFormatv("'{0}' takes a variable or expression", m_cmd_name); - return false; + return; } if (args.HasArgs()) { if (!ParseOptionsAndNotify(args.GetArgs(), result, m_option_group, m_exe_ctx)) - return false; + return; } // If the user has not specified, default to disabling persistent results. @@ -164,7 +164,7 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, valobj_sp->Dump(result.GetOutputStream(), dump_options); } result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } } @@ -216,14 +216,12 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, } result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } else { if (valobj_sp) result.SetError(valobj_sp->GetError()); else result.AppendErrorWithFormatv( "unknown error evaluating expression `{0}`", expr); - return false; } } } diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.h b/lldb/source/Commands/CommandObjectDWIMPrint.h index 3fc6c01d472970..d868f8964c2ac5 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.h +++ b/lldb/source/Commands/CommandObjectDWIMPrint.h @@ -44,7 +44,7 @@ class CommandObjectDWIMPrint : public CommandObjectRaw { OptionElementVector &opt_element_vector) override; private: - bool DoExecute(llvm::StringRef command, CommandReturnObject &result) override; + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override; OptionGroupOptions m_option_group; OptionGroupFormat m_format_options = lldb::eFormatDefault; diff --git a/lldb/source/Commands/CommandObjectDiagnostics.cpp b/lldb/source/Commands/CommandObjectDiagnostics.cpp index dfde50a236abc2..ac87f869f01272 100644 --- a/lldb/source/Commands/CommandObjectDiagnostics.cpp +++ b/lldb/source/Commands/CommandObjectDiagnostics.cpp @@ -77,12 +77,12 @@ class CommandObjectDiagnosticsDump : public CommandObjectParsed { return Diagnostics::CreateUniqueDirectory(); } - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { llvm::Expected directory = GetDirectory(); if (!directory) { result.AppendError(llvm::toString(directory.takeError())); - return result.Succeeded(); + return; } llvm::Error error = Diagnostics::Instance().Create(*directory); @@ -90,13 +90,13 @@ class CommandObjectDiagnosticsDump : public CommandObjectParsed { result.AppendErrorWithFormat("failed to write diagnostics to %s", directory->GetPath().c_str()); result.AppendError(llvm::toString(std::move(error))); - return result.Succeeded(); + return; } result.GetOutputStream() << "diagnostics written to " << *directory << '\n'; result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); + return; } CommandOptions m_options; diff --git a/lldb/source/Commands/CommandObjectDisassemble.cpp b/lldb/source/Commands/CommandObjectDisassemble.cpp index 6f78fc9f62876f..d975e39801317e 100644 --- a/lldb/source/Commands/CommandObjectDisassemble.cpp +++ b/lldb/source/Commands/CommandObjectDisassemble.cpp @@ -437,7 +437,7 @@ CommandObjectDisassemble::GetRangesForSelectedMode( return CommandObjectDisassemble::GetPCRanges(); } -bool CommandObjectDisassemble::DoExecute(Args &command, +void CommandObjectDisassemble::DoExecute(Args &command, CommandReturnObject &result) { Target *target = &GetSelectedTarget(); @@ -447,7 +447,7 @@ bool CommandObjectDisassemble::DoExecute(Args &command, if (!m_options.arch.IsValid()) { result.AppendError( "use the --arch option or set the target architecture to disassemble"); - return false; + return; } const char *plugin_name = m_options.GetPluginName(); @@ -466,7 +466,7 @@ bool CommandObjectDisassemble::DoExecute(Args &command, result.AppendErrorWithFormat( "Unable to find Disassembler plug-in for the '%s' architecture.\n", m_options.arch.GetArchitectureName()); - return false; + return; } else if (flavor_string != nullptr && !disassembler->FlavorValidForArchSpec( m_options.arch, flavor_string)) result.AppendWarningWithFormat( @@ -481,7 +481,7 @@ bool CommandObjectDisassemble::DoExecute(Args &command, GetCommandInterpreter().GetDebugger().GetTerminalWidth(); GetOptions()->GenerateOptionUsage(result.GetErrorStream(), *this, terminal_width); - return false; + return; } if (m_options.show_mixed && m_options.num_lines_context == 0) @@ -508,7 +508,7 @@ bool CommandObjectDisassemble::DoExecute(Args &command, GetRangesForSelectedMode(result); if (!ranges) { result.AppendError(toString(ranges.takeError())); - return result.Succeeded(); + return; } bool print_sc_header = ranges->size() > 1; @@ -541,6 +541,4 @@ bool CommandObjectDisassemble::DoExecute(Args &command, if (print_sc_header) result.GetOutputStream() << "\n"; } - - return result.Succeeded(); } diff --git a/lldb/source/Commands/CommandObjectDisassemble.h b/lldb/source/Commands/CommandObjectDisassemble.h index b5146863628d2e..2e4d46dd0ec586 100644 --- a/lldb/source/Commands/CommandObjectDisassemble.h +++ b/lldb/source/Commands/CommandObjectDisassemble.h @@ -73,7 +73,7 @@ class CommandObjectDisassemble : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override; + void DoExecute(Args &command, CommandReturnObject &result) override; llvm::Expected> GetRangesForSelectedMode(CommandReturnObject &result); diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index 2834be660abaf5..3a2dc11e1e71cc 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -594,7 +594,7 @@ GetExprOptions(ExecutionContext &ctx, return expr_options; } -bool CommandObjectExpression::DoExecute(llvm::StringRef command, +void CommandObjectExpression::DoExecute(llvm::StringRef command, CommandReturnObject &result) { m_fixed_expression.clear(); auto exe_ctx = GetCommandInterpreter().GetExecutionContext(); @@ -602,7 +602,7 @@ bool CommandObjectExpression::DoExecute(llvm::StringRef command, if (command.empty()) { GetMultilineExpression(); - return result.Succeeded(); + return; } OptionsWithRaw args(command); @@ -610,7 +610,7 @@ bool CommandObjectExpression::DoExecute(llvm::StringRef command, if (args.HasArgs()) { if (!ParseOptionsAndNotify(args.GetArgs(), result, m_option_group, exe_ctx)) - return false; + return; if (m_repl_option.GetOptionValue().GetCurrentValue()) { Target &target = GetSelectedOrDummyTarget(); @@ -642,7 +642,7 @@ bool CommandObjectExpression::DoExecute(llvm::StringRef command, nullptr, true); if (!repl_error.Success()) { result.SetError(repl_error); - return result.Succeeded(); + return; } } @@ -662,14 +662,14 @@ bool CommandObjectExpression::DoExecute(llvm::StringRef command, "Couldn't create a REPL for %s", Language::GetNameForLanguageType(m_command_options.language)); result.SetError(repl_error); - return result.Succeeded(); + return; } } } // No expression following options else if (expr.empty()) { GetMultilineExpression(); - return result.Succeeded(); + return; } } @@ -691,8 +691,7 @@ bool CommandObjectExpression::DoExecute(llvm::StringRef command, fixed_command.append(m_fixed_expression); history.AppendString(fixed_command); } - return true; + return; } result.SetStatus(eReturnStatusFailed); - return false; } diff --git a/lldb/source/Commands/CommandObjectExpression.h b/lldb/source/Commands/CommandObjectExpression.h index b2b8fc73a1ee83..6fccf10e5dbc1d 100644 --- a/lldb/source/Commands/CommandObjectExpression.h +++ b/lldb/source/Commands/CommandObjectExpression.h @@ -75,7 +75,7 @@ class CommandObjectExpression : public CommandObjectRaw, bool IOHandlerIsInputComplete(IOHandler &io_handler, StringList &lines) override; - bool DoExecute(llvm::StringRef command, CommandReturnObject &result) override; + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override; /// Evaluates the given expression. /// \param output_stream The stream to which the evaluation result will be diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp index 1390fd8748dfaf..1fad638f214536 100644 --- a/lldb/source/Commands/CommandObjectFrame.cpp +++ b/lldb/source/Commands/CommandObjectFrame.cpp @@ -133,7 +133,7 @@ class CommandObjectFrameDiagnose : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Thread *thread = m_exe_ctx.GetThreadPtr(); StackFrameSP frame_sp = thread->GetSelectedFrame(SelectMostRelevantFrame); @@ -143,7 +143,7 @@ class CommandObjectFrameDiagnose : public CommandObjectParsed { if (m_options.reg || m_options.offset) { result.AppendError( "`frame diagnose --address` is incompatible with other arguments."); - return false; + return; } valobj_sp = frame_sp->GuessValueForAddress(*m_options.address); } else if (m_options.reg) { @@ -153,7 +153,7 @@ class CommandObjectFrameDiagnose : public CommandObjectParsed { StopInfoSP stop_info_sp = thread->GetStopInfo(); if (!stop_info_sp) { result.AppendError("No arguments provided, and no stop info."); - return false; + return; } valobj_sp = StopInfo::GetCrashingDereference(stop_info_sp); @@ -161,7 +161,7 @@ class CommandObjectFrameDiagnose : public CommandObjectParsed { if (!valobj_sp) { result.AppendError("No diagnosis available."); - return false; + return; } DumpValueObjectOptions::DeclPrintingHelper helper = @@ -180,8 +180,6 @@ class CommandObjectFrameDiagnose : public CommandObjectParsed { ValueObjectPrinter printer(valobj_sp.get(), &result.GetOutputStream(), options); printer.PrintValueObject(); - - return true; } CommandOptions m_options; @@ -205,10 +203,9 @@ class CommandObjectFrameInfo : public CommandObjectParsed { ~CommandObjectFrameInfo() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { m_exe_ctx.GetFrameRef().DumpUsingSettingsFormat(&result.GetOutputStream()); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -299,7 +296,7 @@ class CommandObjectFrameSelect : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { // No need to check "thread" for validity as eCommandRequiresThread ensures // it is valid Thread *thread = m_exe_ctx.GetThreadPtr(); @@ -320,7 +317,7 @@ class CommandObjectFrameSelect : public CommandObjectParsed { // If you are already at the bottom of the stack, then just warn // and don't reset the frame. result.AppendError("Already at the bottom of the stack."); - return false; + return; } else frame_idx = 0; } @@ -345,7 +342,7 @@ class CommandObjectFrameSelect : public CommandObjectParsed { // If we are already at the top of the stack, just warn and don't // reset the frame. result.AppendError("Already at the top of the stack."); - return false; + return; } else frame_idx = num_frames - 1; } @@ -359,14 +356,14 @@ class CommandObjectFrameSelect : public CommandObjectParsed { m_options.GenerateOptionUsage( result.GetErrorStream(), *this, GetCommandInterpreter().GetDebugger().GetTerminalWidth()); - return false; + return; } if (command.GetArgumentCount() == 1) { if (command[0].ref().getAsInteger(0, frame_idx)) { result.AppendErrorWithFormat("invalid frame index argument '%s'.", command[0].c_str()); - return false; + return; } } else if (command.GetArgumentCount() == 0) { frame_idx = thread->GetSelectedFrameIndex(SelectMostRelevantFrame); @@ -385,8 +382,6 @@ class CommandObjectFrameSelect : public CommandObjectParsed { result.AppendErrorWithFormat("Frame index (%u) out of range.\n", frame_idx); } - - return result.Succeeded(); } CommandOptions m_options; @@ -524,7 +519,7 @@ may even involve JITing and running code in the target program.)"); return std::nullopt; } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { // No need to check "frame" for validity as eCommandRequiresFrame ensures // it is valid StackFrame *frame = m_exe_ctx.GetFramePtr(); @@ -733,13 +728,11 @@ may even involve JITing and running code in the target program.)"); m_cmd_name); // Increment statistics. - bool res = result.Succeeded(); TargetStats &target_stats = GetSelectedOrDummyTarget().GetStatistics(); - if (res) + if (result.Succeeded()) target_stats.GetFrameVariableStats().NotifySuccess(); else target_stats.GetFrameVariableStats().NotifyFailure(); - return res; } OptionGroupOptions m_option_group; @@ -821,7 +814,7 @@ class CommandObjectFrameRecognizerAdd : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override; + void DoExecute(Args &command, CommandReturnObject &result) override; public: CommandObjectFrameRecognizerAdd(CommandInterpreter &interpreter) @@ -877,33 +870,33 @@ Process 1234 stopped ~CommandObjectFrameRecognizerAdd() override = default; }; -bool CommandObjectFrameRecognizerAdd::DoExecute(Args &command, +void CommandObjectFrameRecognizerAdd::DoExecute(Args &command, CommandReturnObject &result) { #if LLDB_ENABLE_PYTHON if (m_options.m_class_name.empty()) { result.AppendErrorWithFormat( "%s needs a Python class name (-l argument).\n", m_cmd_name.c_str()); - return false; + return; } if (m_options.m_module.empty()) { result.AppendErrorWithFormat("%s needs a module name (-s argument).\n", m_cmd_name.c_str()); - return false; + return; } if (m_options.m_symbols.empty()) { result.AppendErrorWithFormat( "%s needs at least one symbol name (-n argument).\n", m_cmd_name.c_str()); - return false; + return; } if (m_options.m_regex && m_options.m_symbols.size() > 1) { result.AppendErrorWithFormat( "%s needs only one symbol regular expression (-n argument).\n", m_cmd_name.c_str()); - return false; + return; } ScriptInterpreter *interpreter = GetDebugger().GetScriptInterpreter(); @@ -934,7 +927,6 @@ bool CommandObjectFrameRecognizerAdd::DoExecute(Args &command, #endif result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); } class CommandObjectFrameRecognizerClear : public CommandObjectParsed { @@ -946,12 +938,11 @@ class CommandObjectFrameRecognizerClear : public CommandObjectParsed { ~CommandObjectFrameRecognizerClear() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { GetSelectedOrDummyTarget() .GetFrameRecognizerManager() .RemoveAllRecognizers(); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -995,33 +986,33 @@ class CommandObjectFrameRecognizerDelete : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (command.GetArgumentCount() == 0) { if (!m_interpreter.Confirm( "About to delete all frame recognizers, do you want to do that?", true)) { result.AppendMessage("Operation cancelled..."); - return false; + return; } GetSelectedOrDummyTarget() .GetFrameRecognizerManager() .RemoveAllRecognizers(); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); + return; } if (command.GetArgumentCount() != 1) { result.AppendErrorWithFormat("'%s' takes zero or one arguments.\n", m_cmd_name.c_str()); - return false; + return; } uint32_t recognizer_id; if (!llvm::to_integer(command.GetArgumentAtIndex(0), recognizer_id)) { result.AppendErrorWithFormat("'%s' is not a valid recognizer id.\n", command.GetArgumentAtIndex(0)); - return false; + return; } if (!GetSelectedOrDummyTarget() @@ -1029,10 +1020,9 @@ class CommandObjectFrameRecognizerDelete : public CommandObjectParsed { .RemoveRecognizerWithID(recognizer_id)) { result.AppendErrorWithFormat("'%s' is not a valid recognizer id.\n", command.GetArgumentAtIndex(0)); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -1046,7 +1036,7 @@ class CommandObjectFrameRecognizerList : public CommandObjectParsed { ~CommandObjectFrameRecognizerList() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { bool any_printed = false; GetSelectedOrDummyTarget().GetFrameRecognizerManager().ForEach( [&result, &any_printed]( @@ -1078,7 +1068,6 @@ class CommandObjectFrameRecognizerList : public CommandObjectParsed { result.GetOutputStream().PutCString("no matching results found.\n"); result.SetStatus(eReturnStatusSuccessFinishNoResult); } - return result.Succeeded(); } }; @@ -1107,35 +1096,35 @@ class CommandObjectFrameRecognizerInfo : public CommandObjectParsed { ~CommandObjectFrameRecognizerInfo() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const char *frame_index_str = command.GetArgumentAtIndex(0); uint32_t frame_index; if (!llvm::to_integer(frame_index_str, frame_index)) { result.AppendErrorWithFormat("'%s' is not a valid frame index.", frame_index_str); - return false; + return; } Process *process = m_exe_ctx.GetProcessPtr(); if (process == nullptr) { result.AppendError("no process"); - return false; + return; } Thread *thread = m_exe_ctx.GetThreadPtr(); if (thread == nullptr) { result.AppendError("no thread"); - return false; + return; } if (command.GetArgumentCount() != 1) { result.AppendErrorWithFormat( "'%s' takes exactly one frame index argument.\n", m_cmd_name.c_str()); - return false; + return; } StackFrameSP frame_sp = thread->GetStackFrameAtIndex(frame_index); if (!frame_sp) { result.AppendErrorWithFormat("no frame with index %u", frame_index); - return false; + return; } auto recognizer = GetSelectedOrDummyTarget() @@ -1152,7 +1141,6 @@ class CommandObjectFrameRecognizerInfo : public CommandObjectParsed { } output_stream.EOL(); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; diff --git a/lldb/source/Commands/CommandObjectGUI.cpp b/lldb/source/Commands/CommandObjectGUI.cpp index a63d1718610c17..b56e49b073b03e 100644 --- a/lldb/source/Commands/CommandObjectGUI.cpp +++ b/lldb/source/Commands/CommandObjectGUI.cpp @@ -24,7 +24,7 @@ CommandObjectGUI::CommandObjectGUI(CommandInterpreter &interpreter) CommandObjectGUI::~CommandObjectGUI() = default; -bool CommandObjectGUI::DoExecute(Args &args, CommandReturnObject &result) { +void CommandObjectGUI::DoExecute(Args &args, CommandReturnObject &result) { #if LLDB_ENABLE_CURSES Debugger &debugger = GetDebugger(); @@ -39,9 +39,7 @@ bool CommandObjectGUI::DoExecute(Args &args, CommandReturnObject &result) { } else { result.AppendError("the gui command requires an interactive terminal."); } - return true; #else result.AppendError("lldb was not built with gui support"); - return false; #endif } diff --git a/lldb/source/Commands/CommandObjectGUI.h b/lldb/source/Commands/CommandObjectGUI.h index 49bad49a957d7e..fde4342724c9dd 100644 --- a/lldb/source/Commands/CommandObjectGUI.h +++ b/lldb/source/Commands/CommandObjectGUI.h @@ -22,7 +22,7 @@ class CommandObjectGUI : public CommandObjectParsed { ~CommandObjectGUI() override; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override; + void DoExecute(Args &args, CommandReturnObject &result) override; }; } // namespace lldb_private diff --git a/lldb/source/Commands/CommandObjectHelp.cpp b/lldb/source/Commands/CommandObjectHelp.cpp index 10aa49ae01ba09..ddb006e52d2c54 100644 --- a/lldb/source/Commands/CommandObjectHelp.cpp +++ b/lldb/source/Commands/CommandObjectHelp.cpp @@ -74,7 +74,7 @@ CommandObjectHelp::CommandOptions::GetDefinitions() { return llvm::ArrayRef(g_help_options); } -bool CommandObjectHelp::DoExecute(Args &command, CommandReturnObject &result) { +void CommandObjectHelp::DoExecute(Args &command, CommandReturnObject &result) { CommandObject::CommandMap::iterator pos; CommandObject *cmd_obj; const size_t argc = command.GetArgumentCount(); @@ -142,14 +142,14 @@ bool CommandObjectHelp::DoExecute(Args &command, CommandReturnObject &result) { } s.Printf("\n"); result.AppendError(s.GetString()); - return false; + return; } else if (!sub_cmd_obj) { StreamString error_msg_stream; GenerateAdditionalHelpAvenuesMessage( &error_msg_stream, cmd_string.c_str(), m_interpreter.GetCommandPrefix(), sub_command.c_str()); result.AppendError(error_msg_stream.GetString()); - return false; + return; } else { GenerateAdditionalHelpAvenuesMessage( &result.GetOutputStream(), cmd_string.c_str(), @@ -197,8 +197,6 @@ bool CommandObjectHelp::DoExecute(Args &command, CommandReturnObject &result) { } } } - - return result.Succeeded(); } void CommandObjectHelp::HandleCompletion(CompletionRequest &request) { diff --git a/lldb/source/Commands/CommandObjectHelp.h b/lldb/source/Commands/CommandObjectHelp.h index a0ed157b9caf4e..9b2c89e6654fad 100644 --- a/lldb/source/Commands/CommandObjectHelp.h +++ b/lldb/source/Commands/CommandObjectHelp.h @@ -76,7 +76,7 @@ class CommandObjectHelp : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override; + void DoExecute(Args &command, CommandReturnObject &result) override; private: CommandOptions m_options; diff --git a/lldb/source/Commands/CommandObjectLanguage.h b/lldb/source/Commands/CommandObjectLanguage.h index 7a280902a07ef5..2f9f8fecc80da3 100644 --- a/lldb/source/Commands/CommandObjectLanguage.h +++ b/lldb/source/Commands/CommandObjectLanguage.h @@ -19,7 +19,7 @@ class CommandObjectLanguage : public CommandObjectMultiword { ~CommandObjectLanguage() override; protected: - bool DoExecute(Args &command, CommandReturnObject &result); + void DoExecute(Args &command, CommandReturnObject &result); }; } // namespace lldb_private diff --git a/lldb/source/Commands/CommandObjectLog.cpp b/lldb/source/Commands/CommandObjectLog.cpp index 5dd6f898983724..6bfbf98078e6e8 100644 --- a/lldb/source/Commands/CommandObjectLog.cpp +++ b/lldb/source/Commands/CommandObjectLog.cpp @@ -162,19 +162,19 @@ class CommandObjectLogEnable : public CommandObjectParsed { } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { if (args.GetArgumentCount() < 2) { result.AppendErrorWithFormat( "%s takes a log channel and one or more log types.\n", m_cmd_name.c_str()); - return false; + return; } if (m_options.handler == eLogHandlerCircular && m_options.buffer_size.GetCurrentValue() == 0) { result.AppendError( "the circular buffer handler requires a non-zero buffer size.\n"); - return false; + return; } if ((m_options.handler != eLogHandlerCircular && @@ -182,13 +182,13 @@ class CommandObjectLogEnable : public CommandObjectParsed { m_options.buffer_size.GetCurrentValue() != 0) { result.AppendError("a buffer size can only be specified for the circular " "and stream buffer handler.\n"); - return false; + return; } if (m_options.handler != eLogHandlerStream && m_options.log_file) { result.AppendError( "a file name can only be specified for the stream handler.\n"); - return false; + return; } // Store into a std::string since we're about to shift the channel off. @@ -212,7 +212,6 @@ class CommandObjectLogEnable : public CommandObjectParsed { result.SetStatus(eReturnStatusSuccessFinishNoResult); else result.SetStatus(eReturnStatusFailed); - return result.Succeeded(); } CommandOptions m_options; @@ -257,12 +256,12 @@ class CommandObjectLogDisable : public CommandObjectParsed { } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { if (args.empty()) { result.AppendErrorWithFormat( "%s takes a log channel and one or more log types.\n", m_cmd_name.c_str()); - return false; + return; } const std::string channel = std::string(args[0].ref()); @@ -278,7 +277,6 @@ class CommandObjectLogDisable : public CommandObjectParsed { result.SetStatus(eReturnStatusSuccessFinishNoResult); result.GetErrorStream() << error_stream.str(); } - return result.Succeeded(); } }; @@ -315,7 +313,7 @@ class CommandObjectLogList : public CommandObjectParsed { } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { std::string output; llvm::raw_string_ostream output_stream(output); if (args.empty()) { @@ -330,7 +328,6 @@ class CommandObjectLogList : public CommandObjectParsed { result.SetStatus(eReturnStatusSuccessFinishResult); } result.GetOutputStream() << output_stream.str(); - return result.Succeeded(); } }; class CommandObjectLogDump : public CommandObjectParsed { @@ -398,12 +395,12 @@ class CommandObjectLogDump : public CommandObjectParsed { } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { if (args.empty()) { result.AppendErrorWithFormat( "%s takes a log channel and one or more log types.\n", m_cmd_name.c_str()); - return false; + return; } std::unique_ptr stream_up; @@ -417,7 +414,7 @@ class CommandObjectLogDump : public CommandObjectParsed { result.AppendErrorWithFormat("Unable to open log file '%s': %s", m_options.log_file.GetPath().c_str(), llvm::toString(file.takeError()).c_str()); - return false; + return; } stream_up = std::make_unique( (*file)->GetDescriptor(), /*shouldClose=*/true); @@ -435,8 +432,6 @@ class CommandObjectLogDump : public CommandObjectParsed { result.SetStatus(eReturnStatusFailed); result.GetErrorStream() << error_stream.str(); } - - return result.Succeeded(); } CommandOptions m_options; @@ -467,7 +462,7 @@ class CommandObjectLogTimerEnable : public CommandObjectParsed { ~CommandObjectLogTimerEnable() override = default; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { result.SetStatus(eReturnStatusFailed); if (args.GetArgumentCount() == 0) { @@ -488,7 +483,6 @@ class CommandObjectLogTimerEnable : public CommandObjectParsed { result.AppendError("Missing subcommand"); result.AppendErrorWithFormat("Usage: %s\n", m_cmd_syntax.c_str()); } - return result.Succeeded(); } }; @@ -503,7 +497,7 @@ class CommandObjectLogTimerDisable : public CommandObjectParsed { ~CommandObjectLogTimerDisable() override = default; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Timer::DumpCategoryTimes(result.GetOutputStream()); Timer::SetDisplayDepth(0); result.SetStatus(eReturnStatusSuccessFinishResult); @@ -512,7 +506,6 @@ class CommandObjectLogTimerDisable : public CommandObjectParsed { result.AppendError("Missing subcommand"); result.AppendErrorWithFormat("Usage: %s\n", m_cmd_syntax.c_str()); } - return result.Succeeded(); } }; @@ -526,7 +519,7 @@ class CommandObjectLogTimerDump : public CommandObjectParsed { ~CommandObjectLogTimerDump() override = default; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Timer::DumpCategoryTimes(result.GetOutputStream()); result.SetStatus(eReturnStatusSuccessFinishResult); @@ -534,7 +527,6 @@ class CommandObjectLogTimerDump : public CommandObjectParsed { result.AppendError("Missing subcommand"); result.AppendErrorWithFormat("Usage: %s\n", m_cmd_syntax.c_str()); } - return result.Succeeded(); } }; @@ -549,7 +541,7 @@ class CommandObjectLogTimerReset : public CommandObjectParsed { ~CommandObjectLogTimerReset() override = default; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Timer::ResetCategoryTimes(); result.SetStatus(eReturnStatusSuccessFinishResult); @@ -557,7 +549,6 @@ class CommandObjectLogTimerReset : public CommandObjectParsed { result.AppendError("Missing subcommand"); result.AppendErrorWithFormat("Usage: %s\n", m_cmd_syntax.c_str()); } - return result.Succeeded(); } }; @@ -593,7 +584,7 @@ class CommandObjectLogTimerIncrement : public CommandObjectParsed { } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { result.SetStatus(eReturnStatusFailed); if (args.GetArgumentCount() == 1) { @@ -612,7 +603,6 @@ class CommandObjectLogTimerIncrement : public CommandObjectParsed { result.AppendError("Missing subcommand"); result.AppendErrorWithFormat("Usage: %s\n", m_cmd_syntax.c_str()); } - return result.Succeeded(); } }; diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp index 97f2dde7b1eb2b..b02b7dee5619f8 100644 --- a/lldb/source/Commands/CommandObjectMemory.cpp +++ b/lldb/source/Commands/CommandObjectMemory.cpp @@ -348,7 +348,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { // No need to check "target" for validity as eCommandRequiresTarget ensures // it is valid Target *target = m_exe_ctx.GetTargetPtr(); @@ -361,7 +361,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { m_cmd_name.c_str()); result.AppendWarning("Expressions should be quoted if they contain " "spaces or other special characters."); - return false; + return; } CompilerType compiler_type; @@ -441,7 +441,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { } else { result.AppendErrorWithFormat("invalid type string: '%s'\n", view_as_type_cstr); - return false; + return; } break; @@ -490,7 +490,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { "Mutiple types found matching raw type '%s', please disambiguate " "by specifying the language with -x", lookup_type_name.GetCString()); - return false; + return; } if (user_defined_types.size() == 1) { @@ -504,7 +504,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { "the raw type '%s' for full type '%s'\n", lookup_type_name.GetCString(), view_as_type_cstr); - return false; + return; } else { TypeSP type_sp(type_list.GetTypeAtIndex(0)); compiler_type = type_sp->GetFullCompilerType(); @@ -517,7 +517,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { compiler_type = pointer_type; else { result.AppendError("unable make a pointer type\n"); - return false; + return; } --pointer_count; } @@ -527,7 +527,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { result.AppendErrorWithFormat( "unable to get the byte size of the type '%s'\n", view_as_type_cstr); - return false; + return; } m_format_options.GetByteSizeValue() = *size; @@ -540,7 +540,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { // Look for invalid combinations of settings if (error.Fail()) { result.AppendError(error.AsCString()); - return false; + return; } lldb::addr_t addr; @@ -591,7 +591,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { if (addr == LLDB_INVALID_ADDRESS) { result.AppendError("invalid start address expression."); result.AppendError(error.AsCString()); - return false; + return; } if (argc == 2) { @@ -601,19 +601,19 @@ class CommandObjectMemoryRead : public CommandObjectParsed { if (end_addr == LLDB_INVALID_ADDRESS) { result.AppendError("invalid end address expression."); result.AppendError(error.AsCString()); - return false; + return; } else if (end_addr <= addr) { result.AppendErrorWithFormat( "end address (0x%" PRIx64 ") must be greater than the start address (0x%" PRIx64 ").\n", end_addr, addr); - return false; + return; } else if (m_format_options.GetCountValue().OptionWasSet()) { result.AppendErrorWithFormat( "specify either the end address (0x%" PRIx64 ") or the count (--count %" PRIu64 "), not both.\n", end_addr, (uint64_t)item_count); - return false; + return; } total_byte_size = end_addr - addr; @@ -631,7 +631,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { "Please use --force to override this restriction just once.\n"); result.AppendErrorWithFormat("or set target.max-memory-read-size if you " "will often need a larger limit.\n"); - return false; + return; } WritableDataBufferSP data_sp; @@ -645,7 +645,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { std::optional size = compiler_type.GetByteSize(nullptr); if (!size) { result.AppendError("can't get size of type"); - return false; + return; } bytes_read = *size * m_format_options.GetCountValue().GetCurrentValue(); @@ -659,7 +659,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { "can't allocate 0x%" PRIx32 " bytes for the memory read buffer, specify a smaller size to read", (uint32_t)total_byte_size); - return false; + return; } Address address(addr, nullptr); @@ -673,7 +673,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { result.AppendErrorWithFormat( "failed to read memory from 0x%" PRIx64 ".\n", addr); } - return false; + return; } if (bytes_read < total_byte_size) @@ -699,7 +699,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { "can't allocate 0x%" PRIx64 " bytes for the memory read buffer, specify a smaller size to read", (uint64_t)((item_byte_size + 1) * item_count)); - return false; + return; } uint8_t *data_ptr = data_sp->GetBytes(); auto data_addr = addr; @@ -715,7 +715,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { if (error.Fail()) { result.AppendErrorWithFormat( "failed to read memory from 0x%" PRIx64 ".\n", addr); - return false; + return; } if (item_byte_size == read) { @@ -777,12 +777,12 @@ class CommandObjectMemoryRead : public CommandObjectParsed { result.GetOutputStream().Printf( "%zi bytes %s to '%s'\n", bytes_written, append ? "appended" : "written", path.c_str()); - return true; + return; } else { result.AppendErrorWithFormat("Failed to write %" PRIu64 " bytes to '%s'.\n", (uint64_t)bytes_read, path.c_str()); - return false; + return; } } else { // We are going to write ASCII to the file just point the @@ -795,7 +795,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { path.c_str(), append ? "append" : "write"); result.AppendError(llvm::toString(outfile.takeError())); - return false; + return; } } else { output_stream_p = &result.GetOutputStream(); @@ -823,10 +823,10 @@ class CommandObjectMemoryRead : public CommandObjectParsed { result.AppendErrorWithFormat( "failed to create a value object for: (%s) %s\n", view_as_type_cstr, name_strm.GetData()); - return false; + return; } } - return true; + return; } result.SetStatus(eReturnStatusSuccessFinishResult); @@ -852,7 +852,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed { result.AppendErrorWithFormat( "reading memory as characters of size %" PRIu64 " is not supported", (uint64_t)item_byte_size); - return false; + return; } } @@ -863,7 +863,6 @@ class CommandObjectMemoryRead : public CommandObjectParsed { exe_scope, m_memory_tag_options.GetShowTags().GetCurrentValue()); m_next_addr = addr + bytes_dumped; output_stream_p->EOL(); - return true; } OptionGroupOptions m_option_group; @@ -1010,7 +1009,7 @@ class CommandObjectMemoryFind : public CommandObjectParsed { lldb::addr_t m_base_addr; bool m_is_valid = true; }; - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { // No need to check "process" for validity as eCommandRequiresProcess // ensures it is valid Process *process = m_exe_ctx.GetProcessPtr(); @@ -1019,7 +1018,7 @@ class CommandObjectMemoryFind : public CommandObjectParsed { if (argc != 2) { result.AppendError("two addresses needed for memory find"); - return false; + return; } Status error; @@ -1027,19 +1026,19 @@ class CommandObjectMemoryFind : public CommandObjectParsed { &m_exe_ctx, command[0].ref(), LLDB_INVALID_ADDRESS, &error); if (low_addr == LLDB_INVALID_ADDRESS || error.Fail()) { result.AppendError("invalid low address"); - return false; + return; } lldb::addr_t high_addr = OptionArgParser::ToAddress( &m_exe_ctx, command[1].ref(), LLDB_INVALID_ADDRESS, &error); if (high_addr == LLDB_INVALID_ADDRESS || error.Fail()) { result.AppendError("invalid high address"); - return false; + return; } if (high_addr <= low_addr) { result.AppendError( "starting address must be smaller than ending address"); - return false; + return; } lldb::addr_t found_location = LLDB_INVALID_ADDRESS; @@ -1051,7 +1050,7 @@ class CommandObjectMemoryFind : public CommandObjectParsed { m_memory_options.m_string.GetValueAs().value_or(""); if (str.empty()) { result.AppendError("search string must have non-zero length."); - return false; + return; } buffer.CopyData(str); } else if (m_memory_options.m_expr.OptionWasSet()) { @@ -1067,7 +1066,7 @@ class CommandObjectMemoryFind : public CommandObjectParsed { std::optional size = result_sp->GetCompilerType().GetByteSize(nullptr); if (!size) - return false; + return; switch (*size) { case 1: { uint8_t byte = (uint8_t)value; @@ -1089,21 +1088,21 @@ class CommandObjectMemoryFind : public CommandObjectParsed { case 6: case 7: result.AppendError("unknown type. pass a string instead"); - return false; + return; default: result.AppendError( "result size larger than 8 bytes. pass a string instead"); - return false; + return; } } else { result.AppendError( "expression evaluation failed. pass a string instead"); - return false; + return; } } else { result.AppendError( "please pass either a block of text, or an expression to evaluate."); - return false; + return; } size_t count = m_memory_options.m_count.GetCurrentValue(); @@ -1146,7 +1145,6 @@ class CommandObjectMemoryFind : public CommandObjectParsed { } result.SetStatus(lldb::eReturnStatusSuccessFinishResult); - return true; } lldb::addr_t FastSearch(lldb::addr_t low, lldb::addr_t high, uint8_t *buffer, @@ -1291,7 +1289,7 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { Options *GetOptions() override { return &m_option_group; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { // No need to check "process" for validity as eCommandRequiresProcess // ensures it is valid Process *process = m_exe_ctx.GetProcessPtr(); @@ -1303,19 +1301,19 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { result.AppendErrorWithFormat( "%s takes a destination address when writing file contents.\n", m_cmd_name.c_str()); - return false; + return; } if (argc > 1) { result.AppendErrorWithFormat( "%s takes only a destination address when writing file contents.\n", m_cmd_name.c_str()); - return false; + return; } } else if (argc < 2) { result.AppendErrorWithFormat( "%s takes a destination address and at least one value.\n", m_cmd_name.c_str()); - return false; + return; } StreamString buffer( @@ -1333,7 +1331,7 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { if (addr == LLDB_INVALID_ADDRESS) { result.AppendError("invalid address expression\n"); result.AppendError(error.AsCString()); - return false; + return; } if (m_memory_options.m_infile) { @@ -1372,7 +1370,7 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { } else { result.AppendErrorWithFormat("Unable to read contents of file.\n"); } - return result.Succeeded(); + return; } else if (item_byte_size == 0) { if (m_format_options.GetFormat() == eFormatPointer) item_byte_size = buffer.GetAddressByteSize(); @@ -1415,7 +1413,7 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { case eFormatInstruction: case eFormatVoid: result.AppendError("unsupported format for writing memory"); - return false; + return; case eFormatDefault: case eFormatBytes: @@ -1433,13 +1431,13 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { if (!success) { result.AppendErrorWithFormat( "'%s' is not a valid hex string value.\n", entry.c_str()); - return false; + return; } else if (!llvm::isUIntN(item_byte_size * 8, uval64)) { result.AppendErrorWithFormat("Value 0x%" PRIx64 " is too large to fit in a %" PRIu64 " byte unsigned integer value.\n", uval64, (uint64_t)item_byte_size); - return false; + return; } buffer.PutMaxHex64(uval64, item_byte_size); break; @@ -1449,7 +1447,7 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { if (!success) { result.AppendErrorWithFormat( "'%s' is not a valid boolean string value.\n", entry.c_str()); - return false; + return; } buffer.PutMaxHex64(uval64, item_byte_size); break; @@ -1458,13 +1456,13 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { if (entry.ref().getAsInteger(2, uval64)) { result.AppendErrorWithFormat( "'%s' is not a valid binary string value.\n", entry.c_str()); - return false; + return; } else if (!llvm::isUIntN(item_byte_size * 8, uval64)) { result.AppendErrorWithFormat("Value 0x%" PRIx64 " is too large to fit in a %" PRIu64 " byte unsigned integer value.\n", uval64, (uint64_t)item_byte_size); - return false; + return; } buffer.PutMaxHex64(uval64, item_byte_size); break; @@ -1486,7 +1484,7 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { result.AppendErrorWithFormat("Memory write to 0x%" PRIx64 " failed: %s.\n", addr, error.AsCString()); - return false; + return; } break; } @@ -1494,13 +1492,13 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { if (entry.ref().getAsInteger(0, sval64)) { result.AppendErrorWithFormat( "'%s' is not a valid signed decimal value.\n", entry.c_str()); - return false; + return; } else if (!llvm::isIntN(item_byte_size * 8, sval64)) { result.AppendErrorWithFormat( "Value %" PRIi64 " is too large or small to fit in a %" PRIu64 " byte signed integer value.\n", sval64, (uint64_t)item_byte_size); - return false; + return; } buffer.PutMaxHex64(sval64, item_byte_size); break; @@ -1511,13 +1509,13 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { result.AppendErrorWithFormat( "'%s' is not a valid unsigned decimal string value.\n", entry.c_str()); - return false; + return; } else if (!llvm::isUIntN(item_byte_size * 8, uval64)) { result.AppendErrorWithFormat("Value %" PRIu64 " is too large to fit in a %" PRIu64 " byte unsigned integer value.\n", uval64, (uint64_t)item_byte_size); - return false; + return; } buffer.PutMaxHex64(uval64, item_byte_size); break; @@ -1526,13 +1524,13 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { if (entry.ref().getAsInteger(8, uval64)) { result.AppendErrorWithFormat( "'%s' is not a valid octal string value.\n", entry.c_str()); - return false; + return; } else if (!llvm::isUIntN(item_byte_size * 8, uval64)) { result.AppendErrorWithFormat("Value %" PRIo64 " is too large to fit in a %" PRIu64 " byte unsigned integer value.\n", uval64, (uint64_t)item_byte_size); - return false; + return; } buffer.PutMaxHex64(uval64, item_byte_size); break; @@ -1541,18 +1539,18 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { if (!buffer.GetString().empty()) { Status error; - if (process->WriteMemory(addr, buffer.GetString().data(), - buffer.GetString().size(), - error) == buffer.GetString().size()) - return true; - else { + const char *buffer_data = buffer.GetString().data(); + const size_t buffer_size = buffer.GetString().size(); + const size_t write_size = + process->WriteMemory(addr, buffer_data, buffer_size, error); + + if (write_size != buffer_size) { result.AppendErrorWithFormat("Memory write to 0x%" PRIx64 " failed: %s.\n", addr, error.AsCString()); - return false; + return; } } - return true; } OptionGroupOptions m_option_group; @@ -1595,13 +1593,13 @@ class CommandObjectMemoryHistory : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc == 0 || argc > 1) { result.AppendErrorWithFormat("%s takes an address expression", m_cmd_name.c_str()); - return false; + return; } Status error; @@ -1611,7 +1609,7 @@ class CommandObjectMemoryHistory : public CommandObjectParsed { if (addr == LLDB_INVALID_ADDRESS) { result.AppendError("invalid address expression"); result.AppendError(error.AsCString()); - return false; + return; } Stream *output_stream = &result.GetOutputStream(); @@ -1622,7 +1620,7 @@ class CommandObjectMemoryHistory : public CommandObjectParsed { if (!memory_history) { result.AppendError("no available memory history provider"); - return false; + return; } HistoryThreads thread_list = memory_history->GetHistoryThreads(addr); @@ -1633,8 +1631,6 @@ class CommandObjectMemoryHistory : public CommandObjectParsed { } result.SetStatus(eReturnStatusSuccessFinishResult); - - return true; } }; @@ -1747,12 +1743,12 @@ class CommandObjectMemoryRegion : public CommandObjectParsed { } } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { ProcessSP process_sp = m_exe_ctx.GetProcessSP(); if (!process_sp) { m_prev_end_addr = LLDB_INVALID_ADDRESS; result.AppendError("invalid process"); - return false; + return; } Status error; @@ -1767,7 +1763,7 @@ class CommandObjectMemoryRegion : public CommandObjectParsed { result.AppendError( "The \"--all\" option cannot be used when an address " "argument is given"); - return false; + return; } auto load_addr_str = command[0].ref(); @@ -1776,7 +1772,7 @@ class CommandObjectMemoryRegion : public CommandObjectParsed { if (error.Fail() || load_addr == LLDB_INVALID_ADDRESS) { result.AppendErrorWithFormat("invalid address argument \"%s\": %s\n", command[0].c_str(), error.AsCString()); - return false; + return; } } else if (argc > 1 || // When we're repeating the command, the previous end address is @@ -1792,7 +1788,7 @@ class CommandObjectMemoryRegion : public CommandObjectParsed { result.AppendErrorWithFormat( "'%s' takes one argument or \"--all\" option:\nUsage: %s\n", m_cmd_name.c_str(), m_cmd_syntax.c_str()); - return false; + return; } // It is important that we track the address used to request the region as @@ -1832,11 +1828,10 @@ class CommandObjectMemoryRegion : public CommandObjectParsed { } result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } result.AppendErrorWithFormat("%s\n", error.AsCString()); - return false; } std::optional GetRepeatCommand(Args ¤t_command_args, diff --git a/lldb/source/Commands/CommandObjectMemoryTag.cpp b/lldb/source/Commands/CommandObjectMemoryTag.cpp index b436a185cd1452..f45d6eacab3d0e 100644 --- a/lldb/source/Commands/CommandObjectMemoryTag.cpp +++ b/lldb/source/Commands/CommandObjectMemoryTag.cpp @@ -42,12 +42,12 @@ class CommandObjectMemoryTagRead : public CommandObjectParsed { ~CommandObjectMemoryTagRead() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if ((command.GetArgumentCount() < 1) || (command.GetArgumentCount() > 2)) { result.AppendError( "wrong number of arguments; expected at least , " "at most "); - return false; + return; } Status error; @@ -56,7 +56,7 @@ class CommandObjectMemoryTagRead : public CommandObjectParsed { if (start_addr == LLDB_INVALID_ADDRESS) { result.AppendErrorWithFormatv("Invalid address expression, {0}", error.AsCString()); - return false; + return; } // Default 1 byte beyond start, rounds up to at most 1 granule later @@ -68,7 +68,7 @@ class CommandObjectMemoryTagRead : public CommandObjectParsed { if (end_addr == LLDB_INVALID_ADDRESS) { result.AppendErrorWithFormatv("Invalid end address expression, {0}", error.AsCString()); - return false; + return; } } @@ -78,7 +78,7 @@ class CommandObjectMemoryTagRead : public CommandObjectParsed { if (!tag_manager_or_err) { result.SetError(Status(tag_manager_or_err.takeError())); - return false; + return; } const MemoryTagManager *tag_manager = *tag_manager_or_err; @@ -103,7 +103,7 @@ class CommandObjectMemoryTagRead : public CommandObjectParsed { if (!tagged_range) { result.SetError(Status(tagged_range.takeError())); - return false; + return; } llvm::Expected> tags = process->ReadMemoryTags( @@ -111,7 +111,7 @@ class CommandObjectMemoryTagRead : public CommandObjectParsed { if (!tags) { result.SetError(Status(tags.takeError())); - return false; + return; } result.AppendMessageWithFormatv("Logical tag: {0:x}", logical_tag); @@ -128,7 +128,6 @@ class CommandObjectMemoryTagRead : public CommandObjectParsed { } result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } }; @@ -195,11 +194,11 @@ class CommandObjectMemoryTagWrite : public CommandObjectParsed { Options *GetOptions() override { return &m_option_group; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (command.GetArgumentCount() < 2) { result.AppendError("wrong number of arguments; expected " " [ [...]]"); - return false; + return; } Status error; @@ -208,7 +207,7 @@ class CommandObjectMemoryTagWrite : public CommandObjectParsed { if (start_addr == LLDB_INVALID_ADDRESS) { result.AppendErrorWithFormatv("Invalid address expression, {0}", error.AsCString()); - return false; + return; } command.Shift(); // shift off start address @@ -221,7 +220,7 @@ class CommandObjectMemoryTagWrite : public CommandObjectParsed { result.AppendErrorWithFormat( "'%s' is not a valid unsigned decimal string value.\n", entry.c_str()); - return false; + return; } tags.push_back(tag_value); } @@ -232,7 +231,7 @@ class CommandObjectMemoryTagWrite : public CommandObjectParsed { if (!tag_manager_or_err) { result.SetError(Status(tag_manager_or_err.takeError())); - return false; + return; } const MemoryTagManager *tag_manager = *tag_manager_or_err; @@ -284,7 +283,7 @@ class CommandObjectMemoryTagWrite : public CommandObjectParsed { if (!tagged_range) { result.SetError(Status(tagged_range.takeError())); - return false; + return; } Status status = process->WriteMemoryTags(tagged_range->GetRangeBase(), @@ -292,11 +291,10 @@ class CommandObjectMemoryTagWrite : public CommandObjectParsed { if (status.Fail()) { result.SetError(status); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } OptionGroupOptions m_option_group; diff --git a/lldb/source/Commands/CommandObjectPlatform.cpp b/lldb/source/Commands/CommandObjectPlatform.cpp index 54115b51be78e4..790f1dbb475358 100644 --- a/lldb/source/Commands/CommandObjectPlatform.cpp +++ b/lldb/source/Commands/CommandObjectPlatform.cpp @@ -169,7 +169,7 @@ class CommandObjectPlatformSelect : public CommandObjectParsed { Options *GetOptions() override { return &m_option_group; } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { if (args.GetArgumentCount() == 1) { const char *platform_name = args.GetArgumentAtIndex(0); if (platform_name && platform_name[0]) { @@ -194,7 +194,6 @@ class CommandObjectPlatformSelect : public CommandObjectParsed { result.AppendError( "platform create takes a platform name as an argument\n"); } - return result.Succeeded(); } OptionGroupOptions m_option_group; @@ -212,7 +211,7 @@ class CommandObjectPlatformList : public CommandObjectParsed { ~CommandObjectPlatformList() override = default; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Stream &ostrm = result.GetOutputStream(); ostrm.Printf("Available platforms:\n"); @@ -235,7 +234,6 @@ class CommandObjectPlatformList : public CommandObjectParsed { result.AppendError("no platforms are available\n"); } else result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -250,7 +248,7 @@ class CommandObjectPlatformStatus : public CommandObjectParsed { ~CommandObjectPlatformStatus() override = default; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Stream &ostrm = result.GetOutputStream(); Target *target = GetDebugger().GetSelectedTarget().get(); @@ -267,7 +265,6 @@ class CommandObjectPlatformStatus : public CommandObjectParsed { } else { result.AppendError("no platform is currently selected\n"); } - return result.Succeeded(); } }; @@ -286,7 +283,7 @@ class CommandObjectPlatformConnect : public CommandObjectParsed { ~CommandObjectPlatformConnect() override = default; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Stream &ostrm = result.GetOutputStream(); PlatformSP platform_sp( @@ -307,7 +304,6 @@ class CommandObjectPlatformConnect : public CommandObjectParsed { } else { result.AppendError("no platform is currently selected\n"); } - return result.Succeeded(); } Options *GetOptions() override { @@ -334,7 +330,7 @@ class CommandObjectPlatformDisconnect : public CommandObjectParsed { ~CommandObjectPlatformDisconnect() override = default; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { PlatformSP platform_sp( GetDebugger().GetPlatformList().GetSelectedPlatform()); if (platform_sp) { @@ -374,7 +370,6 @@ class CommandObjectPlatformDisconnect : public CommandObjectParsed { } else { result.AppendError("no platform is currently selected"); } - return result.Succeeded(); } }; @@ -394,7 +389,7 @@ class CommandObjectPlatformSettings : public CommandObjectParsed { ~CommandObjectPlatformSettings() override = default; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { PlatformSP platform_sp( GetDebugger().GetPlatformList().GetSelectedPlatform()); if (platform_sp) { @@ -404,7 +399,6 @@ class CommandObjectPlatformSettings : public CommandObjectParsed { } else { result.AppendError("no platform is currently selected"); } - return result.Succeeded(); } Options *GetOptions() override { @@ -430,7 +424,7 @@ class CommandObjectPlatformMkDir : public CommandObjectParsed { ~CommandObjectPlatformMkDir() override = default; - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { PlatformSP platform_sp( GetDebugger().GetPlatformList().GetSelectedPlatform()); if (platform_sp) { @@ -453,7 +447,6 @@ class CommandObjectPlatformMkDir : public CommandObjectParsed { } else { result.AppendError("no platform currently selected\n"); } - return result.Succeeded(); } Options *GetOptions() override { @@ -489,7 +482,7 @@ class CommandObjectPlatformFOpen : public CommandObjectParsed { nullptr); } - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { PlatformSP platform_sp( GetDebugger().GetPlatformList().GetSelectedPlatform()); if (platform_sp) { @@ -517,7 +510,6 @@ class CommandObjectPlatformFOpen : public CommandObjectParsed { } else { result.AppendError("no platform currently selected\n"); } - return result.Succeeded(); } Options *GetOptions() override { @@ -544,7 +536,7 @@ class CommandObjectPlatformFClose : public CommandObjectParsed { ~CommandObjectPlatformFClose() override = default; - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { PlatformSP platform_sp( GetDebugger().GetPlatformList().GetSelectedPlatform()); if (platform_sp) { @@ -554,7 +546,7 @@ class CommandObjectPlatformFClose : public CommandObjectParsed { if (!llvm::to_integer(cmd_line, fd)) { result.AppendErrorWithFormatv("'{0}' is not a valid file descriptor.\n", cmd_line); - return result.Succeeded(); + return; } Status error; bool success = platform_sp->CloseFile(fd, error); @@ -567,7 +559,6 @@ class CommandObjectPlatformFClose : public CommandObjectParsed { } else { result.AppendError("no platform currently selected\n"); } - return result.Succeeded(); } }; @@ -588,7 +579,7 @@ class CommandObjectPlatformFRead : public CommandObjectParsed { ~CommandObjectPlatformFRead() override = default; - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { PlatformSP platform_sp( GetDebugger().GetPlatformList().GetSelectedPlatform()); if (platform_sp) { @@ -598,7 +589,7 @@ class CommandObjectPlatformFRead : public CommandObjectParsed { if (!llvm::to_integer(cmd_line, fd)) { result.AppendErrorWithFormatv("'{0}' is not a valid file descriptor.\n", cmd_line); - return result.Succeeded(); + return; } std::string buffer(m_options.m_count, 0); Status error; @@ -614,7 +605,6 @@ class CommandObjectPlatformFRead : public CommandObjectParsed { } else { result.AppendError("no platform currently selected\n"); } - return result.Succeeded(); } Options *GetOptions() override { return &m_options; } @@ -684,7 +674,7 @@ class CommandObjectPlatformFWrite : public CommandObjectParsed { ~CommandObjectPlatformFWrite() override = default; - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { PlatformSP platform_sp( GetDebugger().GetPlatformList().GetSelectedPlatform()); if (platform_sp) { @@ -695,7 +685,7 @@ class CommandObjectPlatformFWrite : public CommandObjectParsed { if (!llvm::to_integer(cmd_line, fd)) { result.AppendErrorWithFormatv("'{0}' is not a valid file descriptor.", cmd_line); - return result.Succeeded(); + return; } uint64_t retcode = platform_sp->WriteFile(fd, m_options.m_offset, &m_options.m_data[0], @@ -709,7 +699,6 @@ class CommandObjectPlatformFWrite : public CommandObjectParsed { } else { result.AppendError("no platform currently selected\n"); } - return result.Succeeded(); } Options *GetOptions() override { return &m_options; } @@ -839,12 +828,12 @@ class CommandObjectPlatformGetFile : public CommandObjectParsed { GetCommandInterpreter(), lldb::eDiskFileCompletion, request, nullptr); } - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { // If the number of arguments is incorrect, issue an error message. if (args.GetArgumentCount() != 2) { result.AppendError("required arguments missing; specify both the " "source and destination file paths"); - return false; + return; } PlatformSP platform_sp( @@ -866,7 +855,6 @@ class CommandObjectPlatformGetFile : public CommandObjectParsed { } else { result.AppendError("no platform currently selected\n"); } - return result.Succeeded(); } }; @@ -911,12 +899,12 @@ class CommandObjectPlatformGetSize : public CommandObjectParsed { nullptr); } - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { // If the number of arguments is incorrect, issue an error message. if (args.GetArgumentCount() != 1) { result.AppendError("required argument missing; specify the source file " "path as the only argument"); - return false; + return; } PlatformSP platform_sp( @@ -937,7 +925,6 @@ class CommandObjectPlatformGetSize : public CommandObjectParsed { } else { result.AppendError("no platform currently selected\n"); } - return result.Succeeded(); } }; @@ -982,12 +969,12 @@ class CommandObjectPlatformGetPermissions : public CommandObjectParsed { nullptr); } - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { // If the number of arguments is incorrect, issue an error message. if (args.GetArgumentCount() != 1) { result.AppendError("required argument missing; specify the source file " "path as the only argument"); - return false; + return; } PlatformSP platform_sp( @@ -1007,7 +994,6 @@ class CommandObjectPlatformGetPermissions : public CommandObjectParsed { } else { result.AppendError("no platform currently selected\n"); } - return result.Succeeded(); } }; @@ -1052,12 +1038,12 @@ class CommandObjectPlatformFileExists : public CommandObjectParsed { nullptr); } - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { // If the number of arguments is incorrect, issue an error message. if (args.GetArgumentCount() != 1) { result.AppendError("required argument missing; specify the source file " "path as the only argument"); - return false; + return; } PlatformSP platform_sp( @@ -1072,7 +1058,6 @@ class CommandObjectPlatformFileExists : public CommandObjectParsed { } else { result.AppendError("no platform currently selected\n"); } - return result.Succeeded(); } }; @@ -1114,7 +1099,7 @@ class CommandObjectPlatformPutFile : public CommandObjectParsed { nullptr); } - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { const char *src = args.GetArgumentAtIndex(0); const char *dst = args.GetArgumentAtIndex(1); @@ -1134,7 +1119,6 @@ class CommandObjectPlatformPutFile : public CommandObjectParsed { } else { result.AppendError("no platform currently selected\n"); } - return result.Succeeded(); } }; @@ -1160,7 +1144,7 @@ class CommandObjectPlatformProcessLaunch : public CommandObjectParsed { Options *GetOptions() override { return &m_all_options; } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Target *target = GetDebugger().GetSelectedTarget().get(); PlatformSP platform_sp; if (target) { @@ -1220,10 +1204,10 @@ class CommandObjectPlatformProcessLaunch : public CommandObjectParsed { if (!process_sp && error.Success()) { result.AppendError("failed to launch or debug process"); - return false; + return; } else if (!error.Success()) { result.AppendError(error.AsCString()); - return false; + return; } const bool synchronous_execution = @@ -1242,7 +1226,7 @@ class CommandObjectPlatformProcessLaunch : public CommandObjectParsed { if (rebroadcast_first_stop) { assert(first_stop_event_sp); process_sp->BroadcastEvent(first_stop_event_sp); - return true; + return; } switch (state) { @@ -1272,18 +1256,17 @@ class CommandObjectPlatformProcessLaunch : public CommandObjectParsed { if (process_sp && process_sp->IsAlive()) { result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; + return; } } else { result.AppendError("'platform process launch' uses the current target " "file and arguments, or the executable and its " "arguments can be specified in this command"); - return false; + return; } } else { result.AppendError("no platform is selected\n"); } - return result.Succeeded(); } CommandOptionsProcessLaunch m_options; @@ -1310,7 +1293,7 @@ class CommandObjectPlatformProcessList : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Target *target = GetDebugger().GetSelectedTarget().get(); PlatformSP platform_sp; if (target) { @@ -1398,7 +1381,6 @@ class CommandObjectPlatformProcessList : public CommandObjectParsed { } else { result.AppendError("no platform is selected\n"); } - return result.Succeeded(); } class CommandOptions : public Options { @@ -1578,7 +1560,7 @@ class CommandObjectPlatformProcessInfo : public CommandObjectParsed { } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Target *target = GetDebugger().GetSelectedTarget().get(); PlatformSP platform_sp; if (target) { @@ -1627,7 +1609,6 @@ class CommandObjectPlatformProcessInfo : public CommandObjectParsed { } else { result.AppendError("no platform is currently selected"); } - return result.Succeeded(); } }; @@ -1649,7 +1630,7 @@ class CommandObjectPlatformProcessAttach : public CommandObjectParsed { ~CommandObjectPlatformProcessAttach() override = default; - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { PlatformSP platform_sp( GetDebugger().GetPlatformList().GetSelectedPlatform()); if (platform_sp) { @@ -1673,7 +1654,6 @@ class CommandObjectPlatformProcessAttach : public CommandObjectParsed { } else { result.AppendError("no platform is currently selected"); } - return result.Succeeded(); } Options *GetOptions() override { return &m_all_options; } @@ -1788,7 +1768,7 @@ class CommandObjectPlatformShell : public CommandObjectRaw { Options *GetOptions() override { return &m_options; } - bool DoExecute(llvm::StringRef raw_command_line, + void DoExecute(llvm::StringRef raw_command_line, CommandReturnObject &result) override { ExecutionContext exe_ctx = GetCommandInterpreter().GetExecutionContext(); m_options.NotifyOptionParsingStarting(&exe_ctx); @@ -1796,7 +1776,7 @@ class CommandObjectPlatformShell : public CommandObjectRaw { // Print out an usage syntax on an empty command line. if (raw_command_line.empty()) { result.GetOutputStream().Printf("%s\n", this->GetSyntax().str().c_str()); - return true; + return; } const bool is_alias = !raw_command_line.contains("platform"); @@ -1804,12 +1784,12 @@ class CommandObjectPlatformShell : public CommandObjectRaw { if (args.HasArgs()) if (!ParseOptions(args.GetArgs(), result)) - return false; + return; if (args.GetRawPart().empty()) { result.GetOutputStream().Printf("%s \n", is_alias ? "shell" : "platform shell"); - return false; + return; } llvm::StringRef cmd = args.GetRawPart(); @@ -1856,7 +1836,6 @@ class CommandObjectPlatformShell : public CommandObjectRaw { } else { result.SetStatus(eReturnStatusSuccessFinishResult); } - return true; } CommandOptions m_options; @@ -1887,10 +1866,10 @@ class CommandObjectPlatformInstall : public CommandObjectParsed { GetCommandInterpreter(), lldb::eDiskFileCompletion, request, nullptr); } - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { if (args.GetArgumentCount() != 2) { result.AppendError("platform target-install takes two arguments"); - return false; + return; } // TODO: move the bulk of this code over to the platform itself FileSpec src(args.GetArgumentAtIndex(0)); @@ -1898,13 +1877,13 @@ class CommandObjectPlatformInstall : public CommandObjectParsed { FileSpec dst(args.GetArgumentAtIndex(1)); if (!FileSystem::Instance().Exists(src)) { result.AppendError("source location does not exist or is not accessible"); - return false; + return; } PlatformSP platform_sp( GetDebugger().GetPlatformList().GetSelectedPlatform()); if (!platform_sp) { result.AppendError("no platform currently selected"); - return false; + return; } Status error = platform_sp->Install(src, dst); @@ -1913,7 +1892,6 @@ class CommandObjectPlatformInstall : public CommandObjectParsed { } else { result.AppendErrorWithFormat("install failed: %s", error.AsCString()); } - return result.Succeeded(); } }; diff --git a/lldb/source/Commands/CommandObjectPlugin.cpp b/lldb/source/Commands/CommandObjectPlugin.cpp index 8661ebb5022b8f..f22885144b09b3 100644 --- a/lldb/source/Commands/CommandObjectPlugin.cpp +++ b/lldb/source/Commands/CommandObjectPlugin.cpp @@ -44,12 +44,12 @@ class CommandObjectPluginLoad : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { size_t argc = command.GetArgumentCount(); if (argc != 1) { result.AppendError("'plugin load' requires one argument"); - return false; + return; } Status error; @@ -62,8 +62,6 @@ class CommandObjectPluginLoad : public CommandObjectParsed { else { result.AppendError(error.AsCString()); } - - return result.Succeeded(); } }; diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index cbf2652dae1ef1..c7ce1b1258c196 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -160,7 +160,7 @@ class CommandObjectProcessLaunch : public CommandObjectProcessLaunchOrAttach { } protected: - bool DoExecute(Args &launch_args, CommandReturnObject &result) override { + void DoExecute(Args &launch_args, CommandReturnObject &result) override { Debugger &debugger = GetDebugger(); Target *target = debugger.GetSelectedTarget().get(); // If our listener is nullptr, users aren't allows to launch @@ -174,13 +174,13 @@ class CommandObjectProcessLaunch : public CommandObjectProcessLaunchOrAttach { if (exe_module_sp == nullptr && !target->GetProcessLaunchInfo().GetExecutableFile()) { result.AppendError("no file in target, create a debug target using the " "'target create' command"); - return false; + return; } StateType state = eStateInvalid; if (!StopProcessIfNecessary(m_exe_ctx.GetProcessPtr(), state, result)) - return false; + return; // Determine whether we will disable ASLR or leave it in the default state // (i.e. enabled if the platform supports it). First check if the process @@ -290,7 +290,6 @@ class CommandObjectProcessLaunch : public CommandObjectProcessLaunchOrAttach { } else { result.AppendError(error.AsCString()); } - return result.Succeeded(); } CommandOptionsProcessLaunch m_options; @@ -320,7 +319,7 @@ class CommandObjectProcessAttach : public CommandObjectProcessLaunchOrAttach { Options *GetOptions() override { return &m_all_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { PlatformSP platform_sp( GetDebugger().GetPlatformList().GetSelectedPlatform()); @@ -334,7 +333,7 @@ class CommandObjectProcessAttach : public CommandObjectProcessLaunchOrAttach { Process *process = m_exe_ctx.GetProcessPtr(); if (!StopProcessIfNecessary(process, state, result)) - return false; + return; if (target == nullptr) { // If there isn't a current target create one. @@ -348,7 +347,7 @@ class CommandObjectProcessAttach : public CommandObjectProcessLaunchOrAttach { target = new_target_sp.get(); if (target == nullptr || error.Fail()) { result.AppendError(error.AsCString("Error creating target")); - return false; + return; } } @@ -384,7 +383,7 @@ class CommandObjectProcessAttach : public CommandObjectProcessLaunchOrAttach { } if (!result.Succeeded()) - return false; + return; // Okay, we're done. Last step is to warn if the executable module has // changed: @@ -429,8 +428,6 @@ class CommandObjectProcessAttach : public CommandObjectProcessLaunchOrAttach { ExecutionContext exe_ctx(process_sp); m_interpreter.HandleCommand("process continue", eLazyBoolNo, exe_ctx, result); } - - return result.Succeeded(); } CommandOptionsProcessAttach m_options; @@ -504,8 +501,7 @@ class CommandObjectProcessContinue : public CommandObjectParsed { bool m_any_bkpts_specified = false; }; - - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Process *process = m_exe_ctx.GetProcessPtr(); bool synchronous_execution = m_interpreter.GetSynchronous(); StateType state = process->GetState(); @@ -543,13 +539,13 @@ class CommandObjectProcessContinue : public CommandObjectParsed { m_options.m_run_to_bkpt_args, target, result, &run_to_bkpt_ids, BreakpointName::Permissions::disablePerm); if (!result.Succeeded()) { - return false; + return; } result.Clear(); if (m_options.m_any_bkpts_specified && run_to_bkpt_ids.GetSize() == 0) { result.AppendError("continue-to breakpoints did not specify any actual " "breakpoints or locations"); - return false; + return; } // First figure out which breakpoints & locations were specified by the @@ -612,7 +608,7 @@ class CommandObjectProcessContinue : public CommandObjectParsed { if (!any_enabled) { result.AppendError("at least one of the continue-to breakpoints must " "be enabled."); - return false; + return; } // Also, if you specify BOTH a breakpoint and one of it's locations, @@ -737,7 +733,6 @@ class CommandObjectProcessContinue : public CommandObjectParsed { "Process cannot be continued from its current state (%s).\n", StateAsCString(state)); } - return result.Succeeded(); } Options *GetOptions() override { return &m_options; } @@ -809,7 +804,7 @@ class CommandObjectProcessDetach : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Process *process = m_exe_ctx.GetProcessPtr(); // FIXME: This will be a Command Option: bool keep_stopped; @@ -826,9 +821,7 @@ class CommandObjectProcessDetach : public CommandObjectParsed { result.SetStatus(eReturnStatusSuccessFinishResult); } else { result.AppendErrorWithFormat("Detach failed: %s\n", error.AsCString()); - return false; } - return result.Succeeded(); } CommandOptions m_options; @@ -894,12 +887,12 @@ class CommandObjectProcessConnect : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (command.GetArgumentCount() != 1) { result.AppendErrorWithFormat( "'%s' takes exactly one argument:\nUsage: %s\n", m_cmd_name.c_str(), m_cmd_syntax.c_str()); - return false; + return; } Process *process = m_exe_ctx.GetProcessPtr(); @@ -908,7 +901,7 @@ class CommandObjectProcessConnect : public CommandObjectParsed { "Process %" PRIu64 " is currently being debugged, kill the process before connecting.\n", process->GetID()); - return false; + return; } const char *plugin_name = nullptr; @@ -929,9 +922,7 @@ class CommandObjectProcessConnect : public CommandObjectParsed { error); if (error.Fail() || process_sp == nullptr) { result.AppendError(error.AsCString("Error connecting to the process")); - return false; } - return true; } CommandOptions m_options; @@ -1032,7 +1023,7 @@ class CommandObjectProcessLoad : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Process *process = m_exe_ctx.GetProcessPtr(); for (auto &entry : command.entries()) { @@ -1071,7 +1062,6 @@ class CommandObjectProcessLoad : public CommandObjectParsed { error.AsCString()); } } - return result.Succeeded(); } CommandOptions m_options; @@ -1115,7 +1105,7 @@ class CommandObjectProcessUnload : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Process *process = m_exe_ctx.GetProcessPtr(); for (auto &entry : command.entries()) { @@ -1138,7 +1128,6 @@ class CommandObjectProcessUnload : public CommandObjectParsed { } } } - return result.Succeeded(); } }; @@ -1184,7 +1173,7 @@ class CommandObjectProcessSignal : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Process *process = m_exe_ctx.GetProcessPtr(); if (command.GetArgumentCount() == 1) { @@ -1214,7 +1203,6 @@ class CommandObjectProcessSignal : public CommandObjectParsed { "'%s' takes exactly one signal number argument:\nUsage: %s\n", m_cmd_name.c_str(), m_cmd_syntax.c_str()); } - return result.Succeeded(); } }; @@ -1233,11 +1221,11 @@ class CommandObjectProcessInterrupt : public CommandObjectParsed { ~CommandObjectProcessInterrupt() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Process *process = m_exe_ctx.GetProcessPtr(); if (process == nullptr) { result.AppendError("no process to halt"); - return false; + return; } bool clear_thread_plans = true; @@ -1248,7 +1236,6 @@ class CommandObjectProcessInterrupt : public CommandObjectParsed { result.AppendErrorWithFormat("Failed to halt process: %s\n", error.AsCString()); } - return result.Succeeded(); } }; @@ -1267,11 +1254,11 @@ class CommandObjectProcessKill : public CommandObjectParsed { ~CommandObjectProcessKill() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Process *process = m_exe_ctx.GetProcessPtr(); if (process == nullptr) { result.AppendError("no process to kill"); - return false; + return; } Status error(process->Destroy(true)); @@ -1281,7 +1268,6 @@ class CommandObjectProcessKill : public CommandObjectParsed { result.AppendErrorWithFormat("Failed to kill process: %s\n", error.AsCString()); } - return result.Succeeded(); } }; @@ -1356,7 +1342,7 @@ class CommandObjectProcessSaveCore : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { ProcessSP process_sp = m_exe_ctx.GetProcessSP(); if (process_sp) { if (command.GetArgumentCount() == 1) { @@ -1390,10 +1376,7 @@ class CommandObjectProcessSaveCore : public CommandObjectParsed { } } else { result.AppendError("invalid process"); - return false; } - - return result.Succeeded(); } CommandOptions m_options; @@ -1451,7 +1434,7 @@ class CommandObjectProcessStatus : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Stream &strm = result.GetOutputStream(); result.SetStatus(eReturnStatusSuccessFinishNoResult); @@ -1483,7 +1466,7 @@ class CommandObjectProcessStatus : public CommandObjectParsed { PlatformSP platform_sp = process->GetTarget().GetPlatform(); if (!platform_sp) { result.AppendError("Couldn'retrieve the target's platform"); - return result.Succeeded(); + return; } auto expected_crash_info = @@ -1491,7 +1474,7 @@ class CommandObjectProcessStatus : public CommandObjectParsed { if (!expected_crash_info) { result.AppendError(llvm::toString(expected_crash_info.takeError())); - return result.Succeeded(); + return; } StructuredData::DictionarySP crash_info_sp = *expected_crash_info; @@ -1502,8 +1485,6 @@ class CommandObjectProcessStatus : public CommandObjectParsed { crash_info_sp->GetDescription(strm); } } - - return result.Succeeded(); } private: @@ -1676,7 +1657,7 @@ class CommandObjectProcessHandle : public CommandObjectParsed { } protected: - bool DoExecute(Args &signal_args, CommandReturnObject &result) override { + void DoExecute(Args &signal_args, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(); // Any signals that are being set should be added to the Target's @@ -1693,28 +1674,28 @@ class CommandObjectProcessHandle : public CommandObjectParsed { !VerifyCommandOptionValue(m_options.stop, stop_action)) { result.AppendError("Invalid argument for command option --stop; must be " "true or false.\n"); - return false; + return; } if (!m_options.notify.empty() && !VerifyCommandOptionValue(m_options.notify, notify_action)) { result.AppendError("Invalid argument for command option --notify; must " "be true or false.\n"); - return false; + return; } if (!m_options.pass.empty() && !VerifyCommandOptionValue(m_options.pass, pass_action)) { result.AppendError("Invalid argument for command option --pass; must be " "true or false.\n"); - return false; + return; } bool no_actions = (stop_action == -1 && pass_action == -1 && notify_action == -1); if (m_options.only_target_values && !no_actions) { result.AppendError("-t is for reporting, not setting, target values."); - return false; + return; } size_t num_args = signal_args.GetArgumentCount(); @@ -1729,7 +1710,7 @@ class CommandObjectProcessHandle : public CommandObjectParsed { if (m_options.only_target_values) { target.PrintDummySignals(result.GetOutputStream(), signal_args); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } // This handles clearing values: @@ -1738,7 +1719,7 @@ class CommandObjectProcessHandle : public CommandObjectParsed { if (m_options.dummy) GetDummyTarget().ClearDummySignals(signal_args); result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; + return; } // This rest handles setting values: @@ -1774,7 +1755,7 @@ class CommandObjectProcessHandle : public CommandObjectParsed { if (llvm::to_integer(arg.c_str(), signo)) { result.AppendErrorWithFormat("Can't set signal handling by signal " "number with no process"); - return false; + return; } num_signals_set = num_args; } @@ -1831,8 +1812,6 @@ class CommandObjectProcessHandle : public CommandObjectParsed { result.SetStatus(eReturnStatusSuccessFinishResult); else result.SetStatus(eReturnStatusFailed); - - return result.Succeeded(); } CommandOptions m_options; @@ -1873,7 +1852,7 @@ class CommandObjectProcessTraceStop : public CommandObjectParsed { ~CommandObjectProcessTraceStop() override = default; - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { ProcessSP process_sp = m_exe_ctx.GetProcessSP(); TraceSP trace_sp = process_sp->GetTarget().GetTrace(); @@ -1882,8 +1861,6 @@ class CommandObjectProcessTraceStop : public CommandObjectParsed { result.AppendError(toString(std::move(err))); else result.SetStatus(eReturnStatusSuccessFinishResult); - - return result.Succeeded(); } }; diff --git a/lldb/source/Commands/CommandObjectQuit.cpp b/lldb/source/Commands/CommandObjectQuit.cpp index 650cfca2c050a2..d7caf1546fb574 100644 --- a/lldb/source/Commands/CommandObjectQuit.cpp +++ b/lldb/source/Commands/CommandObjectQuit.cpp @@ -62,7 +62,7 @@ bool CommandObjectQuit::ShouldAskForConfirmation(bool &is_a_detach) { return should_prompt; } -bool CommandObjectQuit::DoExecute(Args &command, CommandReturnObject &result) { +void CommandObjectQuit::DoExecute(Args &command, CommandReturnObject &result) { bool is_a_detach = true; if (ShouldAskForConfirmation(is_a_detach)) { StreamString message; @@ -71,14 +71,14 @@ bool CommandObjectQuit::DoExecute(Args &command, CommandReturnObject &result) { (is_a_detach ? "detach from" : "kill")); if (!m_interpreter.Confirm(message.GetString(), true)) { result.SetStatus(eReturnStatusFailed); - return false; + return; } } if (command.GetArgumentCount() > 1) { result.AppendError("Too many arguments for 'quit'. Only an optional exit " "code is allowed"); - return false; + return; } // We parse the exit code argument if there is one. @@ -90,12 +90,12 @@ bool CommandObjectQuit::DoExecute(Args &command, CommandReturnObject &result) { std::string arg_str = arg.str(); s.Printf("Couldn't parse '%s' as integer for exit code.", arg_str.data()); result.AppendError(s.GetString()); - return false; + return; } if (!m_interpreter.SetQuitExitCode(exit_code)) { result.AppendError("The current driver doesn't allow custom exit codes" " for the quit command."); - return false; + return; } } @@ -103,6 +103,4 @@ bool CommandObjectQuit::DoExecute(Args &command, CommandReturnObject &result) { CommandInterpreter::eBroadcastBitQuitCommandReceived; m_interpreter.BroadcastEvent(event_type); result.SetStatus(eReturnStatusQuit); - - return true; } diff --git a/lldb/source/Commands/CommandObjectQuit.h b/lldb/source/Commands/CommandObjectQuit.h index ccbd863cd6f5b4..c27c0d1da3b9e9 100644 --- a/lldb/source/Commands/CommandObjectQuit.h +++ b/lldb/source/Commands/CommandObjectQuit.h @@ -22,7 +22,7 @@ class CommandObjectQuit : public CommandObjectParsed { ~CommandObjectQuit() override; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override; + void DoExecute(Args &args, CommandReturnObject &result) override; bool ShouldAskForConfirmation(bool &is_a_detach); }; diff --git a/lldb/source/Commands/CommandObjectRegexCommand.cpp b/lldb/source/Commands/CommandObjectRegexCommand.cpp index 6ff1d281504ac2..f638d707e17e78 100644 --- a/lldb/source/Commands/CommandObjectRegexCommand.cpp +++ b/lldb/source/Commands/CommandObjectRegexCommand.cpp @@ -54,7 +54,7 @@ llvm::Expected CommandObjectRegexCommand::SubstituteVariables( return output.str(); } -bool CommandObjectRegexCommand::DoExecute(llvm::StringRef command, +void CommandObjectRegexCommand::DoExecute(llvm::StringRef command, CommandReturnObject &result) { EntryCollection::const_iterator pos, end = m_entries.end(); for (pos = m_entries.begin(); pos != end; ++pos) { @@ -64,7 +64,7 @@ bool CommandObjectRegexCommand::DoExecute(llvm::StringRef command, SubstituteVariables(pos->command, matches); if (!new_command) { result.SetError(new_command.takeError()); - return false; + return; } // Interpret the new command and return this as the result! @@ -73,8 +73,9 @@ bool CommandObjectRegexCommand::DoExecute(llvm::StringRef command, // We don't have to pass an override_context here, as the command that // called us should have set up the context appropriately. bool force_repeat_command = true; - return m_interpreter.HandleCommand(new_command->c_str(), eLazyBoolNo, - result, force_repeat_command); + m_interpreter.HandleCommand(new_command->c_str(), eLazyBoolNo, result, + force_repeat_command); + return; } } result.SetStatus(eReturnStatusFailed); @@ -85,7 +86,6 @@ bool CommandObjectRegexCommand::DoExecute(llvm::StringRef command, << "' failed to match any " "regular expression in the '" << m_cmd_name << "' regex "; - return false; } bool CommandObjectRegexCommand::AddRegexCommand(llvm::StringRef re_cstr, diff --git a/lldb/source/Commands/CommandObjectRegexCommand.h b/lldb/source/Commands/CommandObjectRegexCommand.h index 47d493a8fdd7bc..c78b0b586c3758 100644 --- a/lldb/source/Commands/CommandObjectRegexCommand.h +++ b/lldb/source/Commands/CommandObjectRegexCommand.h @@ -37,7 +37,7 @@ class CommandObjectRegexCommand : public CommandObjectRaw { void HandleCompletion(CompletionRequest &request) override; protected: - bool DoExecute(llvm::StringRef command, CommandReturnObject &result) override; + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override; /// Substitute variables of the format %\d+ in the input string. static llvm::Expected SubstituteVariables( diff --git a/lldb/source/Commands/CommandObjectRegister.cpp b/lldb/source/Commands/CommandObjectRegister.cpp index 6e6071fd54606d..a4d53e5c8dd5f1 100644 --- a/lldb/source/Commands/CommandObjectRegister.cpp +++ b/lldb/source/Commands/CommandObjectRegister.cpp @@ -161,7 +161,7 @@ class CommandObjectRegisterRead : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Stream &strm = result.GetOutputStream(); RegisterContext *reg_ctx = m_exe_ctx.GetRegisterContext(); @@ -234,7 +234,6 @@ class CommandObjectRegisterRead : public CommandObjectParsed { } } } - return result.Succeeded(); } class CommandOptions : public OptionGroup { @@ -348,7 +347,7 @@ class CommandObjectRegisterWrite : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { DataExtractor reg_data; RegisterContext *reg_ctx = m_exe_ctx.GetRegisterContext(); @@ -378,7 +377,7 @@ class CommandObjectRegisterWrite : public CommandObjectParsed { // has been written. m_exe_ctx.GetThreadRef().Flush(); result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; + return; } } if (error.AsCString()) { @@ -396,7 +395,6 @@ class CommandObjectRegisterWrite : public CommandObjectParsed { reg_name.str().c_str()); } } - return result.Succeeded(); } }; @@ -447,10 +445,10 @@ different for the same register when connected to different debug servers.)"); } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (command.GetArgumentCount() != 1) { result.AppendError("register info takes exactly 1 argument: "); - return result.Succeeded(); + return; } llvm::StringRef reg_name = command[0].ref(); @@ -464,8 +462,6 @@ different for the same register when connected to different debug servers.)"); } else result.AppendErrorWithFormat("No register found with name '%s'.\n", reg_name.str().c_str()); - - return result.Succeeded(); } }; diff --git a/lldb/source/Commands/CommandObjectScript.cpp b/lldb/source/Commands/CommandObjectScript.cpp index 7e4f18a0d51063..25f25b8e65947e 100644 --- a/lldb/source/Commands/CommandObjectScript.cpp +++ b/lldb/source/Commands/CommandObjectScript.cpp @@ -65,14 +65,14 @@ CommandObjectScript::CommandObjectScript(CommandInterpreter &interpreter) CommandObjectScript::~CommandObjectScript() = default; -bool CommandObjectScript::DoExecute(llvm::StringRef command, +void CommandObjectScript::DoExecute(llvm::StringRef command, CommandReturnObject &result) { // Try parsing the language option but when the command contains a raw part // separated by the -- delimiter. OptionsWithRaw raw_args(command); if (raw_args.HasArgs()) { if (!ParseOptions(raw_args.GetArgs(), result)) - return false; + return; command = raw_args.GetRawPart(); } @@ -84,7 +84,7 @@ bool CommandObjectScript::DoExecute(llvm::StringRef command, if (language == lldb::eScriptLanguageNone) { result.AppendError( "the script-lang setting is set to none - scripting not available"); - return false; + return; } ScriptInterpreter *script_interpreter = @@ -92,7 +92,7 @@ bool CommandObjectScript::DoExecute(llvm::StringRef command, if (script_interpreter == nullptr) { result.AppendError("no script interpreter"); - return false; + return; } // Script might change Python code we use for formatting. Make sure we keep @@ -102,7 +102,7 @@ bool CommandObjectScript::DoExecute(llvm::StringRef command, if (command.empty()) { script_interpreter->ExecuteInterpreterLoop(); result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); + return; } // We can do better when reporting the status of one-liner script execution. @@ -110,6 +110,4 @@ bool CommandObjectScript::DoExecute(llvm::StringRef command, result.SetStatus(eReturnStatusSuccessFinishNoResult); else result.SetStatus(eReturnStatusFailed); - - return result.Succeeded(); } diff --git a/lldb/source/Commands/CommandObjectScript.h b/lldb/source/Commands/CommandObjectScript.h index 9d164e864a8bc5..3a8c4a890404a1 100644 --- a/lldb/source/Commands/CommandObjectScript.h +++ b/lldb/source/Commands/CommandObjectScript.h @@ -31,7 +31,7 @@ class CommandObjectScript : public CommandObjectRaw { }; protected: - bool DoExecute(llvm::StringRef command, CommandReturnObject &result) override; + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override; private: CommandOptions m_options; diff --git a/lldb/source/Commands/CommandObjectSession.cpp b/lldb/source/Commands/CommandObjectSession.cpp index 6bf1ec99c88887..d140bdfdba57b3 100644 --- a/lldb/source/Commands/CommandObjectSession.cpp +++ b/lldb/source/Commands/CommandObjectSession.cpp @@ -36,7 +36,7 @@ class CommandObjectSessionSave : public CommandObjectParsed { } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { llvm::StringRef file_path; if (!args.empty()) @@ -46,7 +46,6 @@ class CommandObjectSessionSave : public CommandObjectParsed { result.SetStatus(eReturnStatusSuccessFinishNoResult); else result.SetStatus(eReturnStatusFailed); - return result.Succeeded(); } }; @@ -127,7 +126,7 @@ class CommandObjectSessionHistory : public CommandObjectParsed { OptionValueBoolean m_clear; }; - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (m_options.m_clear.GetCurrentValue() && m_options.m_clear.OptionWasSet()) { m_interpreter.GetCommandHistory().Clear(); @@ -189,7 +188,6 @@ class CommandObjectSessionHistory : public CommandObjectParsed { stop_idx.second); } } - return result.Succeeded(); } CommandOptions m_options; diff --git a/lldb/source/Commands/CommandObjectSettings.cpp b/lldb/source/Commands/CommandObjectSettings.cpp index 7069cb1d83993c..5fb7dcc80279fd 100644 --- a/lldb/source/Commands/CommandObjectSettings.cpp +++ b/lldb/source/Commands/CommandObjectSettings.cpp @@ -169,27 +169,27 @@ insert-before or insert-after."); } protected: - bool DoExecute(llvm::StringRef command, + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override { Args cmd_args(command); // Process possible options. if (!ParseOptions(cmd_args, result)) - return false; + return; const size_t min_argc = m_options.m_force ? 1 : 2; const size_t argc = cmd_args.GetArgumentCount(); if ((argc < min_argc) && (!m_options.m_global)) { result.AppendError("'settings set' takes more arguments"); - return false; + return; } const char *var_name = cmd_args.GetArgumentAtIndex(0); if ((var_name == nullptr) || (var_name[0] == '\0')) { result.AppendError( "'settings set' command requires a valid variable name"); - return false; + return; } // A missing value corresponds to clearing the setting when "force" is @@ -199,9 +199,8 @@ insert-before or insert-after."); &m_exe_ctx, eVarSetOperationClear, var_name, llvm::StringRef())); if (error.Fail()) { result.AppendError(error.AsCString()); - return false; } - return result.Succeeded(); + return; } // Split the raw command into var_name and value pair. @@ -227,11 +226,10 @@ insert-before or insert-after."); if (error.Fail() && !m_options.m_exists) { result.AppendError(error.AsCString()); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } private: @@ -273,7 +271,7 @@ class CommandObjectSettingsShow : public CommandObjectParsed { } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { result.SetStatus(eReturnStatusSuccessFinishResult); if (!args.empty()) { @@ -291,8 +289,6 @@ class CommandObjectSettingsShow : public CommandObjectParsed { GetDebugger().DumpAllPropertyValues(&m_exe_ctx, result.GetOutputStream(), OptionValue::eDumpGroupValue); } - - return result.Succeeded(); } }; @@ -368,7 +364,7 @@ class CommandObjectSettingsWrite : public CommandObjectParsed { }; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { FileSpec file_spec(m_options.m_filename); FileSystem::Instance().Resolve(file_spec); std::string path(file_spec.GetPath()); @@ -383,7 +379,7 @@ class CommandObjectSettingsWrite : public CommandObjectParsed { if (!out_file.GetFile().IsValid()) { result.AppendErrorWithFormat("%s: unable to write to file", path.c_str()); - return false; + return; } // Exporting should not be context sensitive. @@ -392,7 +388,7 @@ class CommandObjectSettingsWrite : public CommandObjectParsed { if (args.empty()) { GetDebugger().DumpAllPropertyValues(&clean_ctx, out_file, OptionValue::eDumpGroupExport); - return result.Succeeded(); + return; } for (const auto &arg : args) { @@ -402,8 +398,6 @@ class CommandObjectSettingsWrite : public CommandObjectParsed { result.AppendError(error.AsCString()); } } - - return result.Succeeded(); } private: @@ -461,7 +455,7 @@ class CommandObjectSettingsRead : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { FileSpec file(m_options.m_filename); FileSystem::Instance().Resolve(file); CommandInterpreterRunOptions options; @@ -471,7 +465,6 @@ class CommandObjectSettingsRead : public CommandObjectParsed { options.SetPrintErrors(true); options.SetStopOnError(false); m_interpreter.HandleCommandsFromFile(file, options, result); - return result.Succeeded(); } private: @@ -517,7 +510,7 @@ class CommandObjectSettingsList : public CommandObjectParsed { } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { result.SetStatus(eReturnStatusSuccessFinishResult); const size_t argc = args.GetArgumentCount(); @@ -543,8 +536,6 @@ class CommandObjectSettingsList : public CommandObjectParsed { GetDebugger().DumpAllDescriptions(m_interpreter, result.GetOutputStream()); } - - return result.Succeeded(); } }; @@ -601,7 +592,7 @@ class CommandObjectSettingsRemove : public CommandObjectRaw { } protected: - bool DoExecute(llvm::StringRef command, + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override { result.SetStatus(eReturnStatusSuccessFinishNoResult); @@ -609,7 +600,7 @@ class CommandObjectSettingsRemove : public CommandObjectRaw { // Process possible options. if (!ParseOptions(cmd_args, result)) - return false; + return; const size_t argc = cmd_args.GetArgumentCount(); if (argc == 0) { @@ -617,14 +608,14 @@ class CommandObjectSettingsRemove : public CommandObjectRaw { "or an array followed by one or more indexes, or a " "dictionary followed by one or more key names to " "remove"); - return false; + return; } const char *var_name = cmd_args.GetArgumentAtIndex(0); if ((var_name == nullptr) || (var_name[0] == '\0')) { result.AppendError( "'settings remove' command requires a valid variable name"); - return false; + return; } // Split the raw command into var_name and value pair. @@ -635,10 +626,7 @@ class CommandObjectSettingsRemove : public CommandObjectRaw { &m_exe_ctx, eVarSetOperationRemove, var_name, var_value)); if (error.Fail()) { result.AppendError(error.AsCString()); - return false; } - - return result.Succeeded(); } }; @@ -709,7 +697,7 @@ class CommandObjectSettingsReplace : public CommandObjectRaw { } protected: - bool DoExecute(llvm::StringRef command, + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override { result.SetStatus(eReturnStatusSuccessFinishNoResult); @@ -718,7 +706,7 @@ class CommandObjectSettingsReplace : public CommandObjectRaw { if ((var_name == nullptr) || (var_name[0] == '\0')) { result.AppendError("'settings replace' command requires a valid variable " "name; No value supplied"); - return false; + return; } // Split the raw command into var_name, index_value, and value triple. @@ -729,12 +717,9 @@ class CommandObjectSettingsReplace : public CommandObjectRaw { &m_exe_ctx, eVarSetOperationReplace, var_name, var_value)); if (error.Fail()) { result.AppendError(error.AsCString()); - return false; } else { result.SetStatus(eReturnStatusSuccessFinishNoResult); } - - return result.Succeeded(); } }; @@ -801,7 +786,7 @@ class CommandObjectSettingsInsertBefore : public CommandObjectRaw { } protected: - bool DoExecute(llvm::StringRef command, + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override { result.SetStatus(eReturnStatusSuccessFinishNoResult); @@ -810,14 +795,14 @@ class CommandObjectSettingsInsertBefore : public CommandObjectRaw { if (argc < 3) { result.AppendError("'settings insert-before' takes more arguments"); - return false; + return; } const char *var_name = cmd_args.GetArgumentAtIndex(0); if ((var_name == nullptr) || (var_name[0] == '\0')) { result.AppendError("'settings insert-before' command requires a valid " "variable name; No value supplied"); - return false; + return; } // Split the raw command into var_name, index_value, and value triple. @@ -828,10 +813,7 @@ class CommandObjectSettingsInsertBefore : public CommandObjectRaw { &m_exe_ctx, eVarSetOperationInsertBefore, var_name, var_value)); if (error.Fail()) { result.AppendError(error.AsCString()); - return false; } - - return result.Succeeded(); } }; @@ -897,7 +879,7 @@ class CommandObjectSettingsInsertAfter : public CommandObjectRaw { } protected: - bool DoExecute(llvm::StringRef command, + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override { result.SetStatus(eReturnStatusSuccessFinishNoResult); @@ -906,14 +888,14 @@ class CommandObjectSettingsInsertAfter : public CommandObjectRaw { if (argc < 3) { result.AppendError("'settings insert-after' takes more arguments"); - return false; + return; } const char *var_name = cmd_args.GetArgumentAtIndex(0); if ((var_name == nullptr) || (var_name[0] == '\0')) { result.AppendError("'settings insert-after' command requires a valid " "variable name; No value supplied"); - return false; + return; } // Split the raw command into var_name, index_value, and value triple. @@ -924,10 +906,7 @@ class CommandObjectSettingsInsertAfter : public CommandObjectRaw { &m_exe_ctx, eVarSetOperationInsertAfter, var_name, var_value)); if (error.Fail()) { result.AppendError(error.AsCString()); - return false; } - - return result.Succeeded(); } }; @@ -982,7 +961,7 @@ class CommandObjectSettingsAppend : public CommandObjectRaw { } protected: - bool DoExecute(llvm::StringRef command, + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override { result.SetStatus(eReturnStatusSuccessFinishNoResult); Args cmd_args(command); @@ -990,14 +969,14 @@ class CommandObjectSettingsAppend : public CommandObjectRaw { if (argc < 2) { result.AppendError("'settings append' takes more arguments"); - return false; + return; } const char *var_name = cmd_args.GetArgumentAtIndex(0); if ((var_name == nullptr) || (var_name[0] == '\0')) { result.AppendError("'settings append' command requires a valid variable " "name; No value supplied"); - return false; + return; } // Do not perform cmd_args.Shift() since StringRef is manipulating the raw @@ -1011,10 +990,7 @@ class CommandObjectSettingsAppend : public CommandObjectRaw { &m_exe_ctx, eVarSetOperationAppend, var_name, var_value)); if (error.Fail()) { result.AppendError(error.AsCString()); - return false; } - - return result.Succeeded(); } }; @@ -1089,39 +1065,36 @@ class CommandObjectSettingsClear : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { result.SetStatus(eReturnStatusSuccessFinishNoResult); const size_t argc = command.GetArgumentCount(); if (m_options.m_clear_all) { if (argc != 0) { result.AppendError("'settings clear --all' doesn't take any arguments"); - return false; + return; } GetDebugger().GetValueProperties()->Clear(); - return result.Succeeded(); + return; } if (argc != 1) { result.AppendError("'settings clear' takes exactly one argument"); - return false; + return; } const char *var_name = command.GetArgumentAtIndex(0); if ((var_name == nullptr) || (var_name[0] == '\0')) { result.AppendError("'settings clear' command requires a valid variable " "name; No value supplied"); - return false; + return; } Status error(GetDebugger().SetPropertyValue( &m_exe_ctx, eVarSetOperationClear, var_name, llvm::StringRef())); if (error.Fail()) { result.AppendError(error.AsCString()); - return false; } - - return result.Succeeded(); } private: diff --git a/lldb/source/Commands/CommandObjectSource.cpp b/lldb/source/Commands/CommandObjectSource.cpp index 16452c1784bd67..db158a7f526305 100644 --- a/lldb/source/Commands/CommandObjectSource.cpp +++ b/lldb/source/Commands/CommandObjectSource.cpp @@ -532,14 +532,14 @@ class CommandObjectSourceInfo : public CommandObjectParsed { return true; } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = m_exe_ctx.GetTargetPtr(); if (target == nullptr) { target = GetDebugger().GetSelectedTarget().get(); if (target == nullptr) { result.AppendError("invalid target, create a debug target using the " "'target create' command."); - return false; + return; } } @@ -562,11 +562,11 @@ class CommandObjectSourceInfo : public CommandObjectParsed { } if (!m_module_list.GetSize()) { result.AppendError("No modules match the input."); - return false; + return; } } else if (target->GetImages().GetSize() == 0) { result.AppendError("The target has no associated executable images."); - return false; + return; } // Check the arguments to see what lines we should dump. @@ -595,7 +595,6 @@ class CommandObjectSourceInfo : public CommandObjectParsed { else result.SetStatus(eReturnStatusFailed); } - return result.Succeeded(); } CommandOptions m_options; @@ -910,7 +909,7 @@ class CommandObjectSourceList : public CommandObjectParsed { } } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = m_exe_ctx.GetTargetPtr(); if (!m_options.symbol_name.empty()) { @@ -939,7 +938,7 @@ class CommandObjectSourceList : public CommandObjectParsed { if (sc_list.GetSize() == 0) { result.AppendErrorWithFormat("Could not find function named: \"%s\".\n", m_options.symbol_name.c_str()); - return false; + return; } std::set source_match_set; @@ -958,7 +957,7 @@ class CommandObjectSourceList : public CommandObjectParsed { result.SetStatus(eReturnStatusSuccessFinishResult); else result.SetStatus(eReturnStatusFailed); - return result.Succeeded(); + return; } else if (m_options.address != LLDB_INVALID_ADDRESS) { Address so_addr; StreamString error_strm; @@ -987,7 +986,7 @@ class CommandObjectSourceList : public CommandObjectParsed { "no modules have source information for file address 0x%" PRIx64 ".\n", m_options.address); - return false; + return; } } else { // The target has some things loaded, resolve this address to a compile @@ -1009,7 +1008,7 @@ class CommandObjectSourceList : public CommandObjectParsed { "is no line table information " "available for this address.\n", error_strm.GetData()); - return false; + return; } } } @@ -1018,7 +1017,7 @@ class CommandObjectSourceList : public CommandObjectParsed { result.AppendErrorWithFormat( "no modules contain load address 0x%" PRIx64 ".\n", m_options.address); - return false; + return; } } for (const SymbolContext &sc : sc_list) { @@ -1134,7 +1133,7 @@ class CommandObjectSourceList : public CommandObjectParsed { if (num_matches == 0) { result.AppendErrorWithFormat("Could not find source file \"%s\".\n", m_options.file_name.c_str()); - return false; + return; } if (num_matches > 1) { @@ -1155,7 +1154,7 @@ class CommandObjectSourceList : public CommandObjectParsed { result.AppendErrorWithFormat( "Multiple source files found matching: \"%s.\"\n", m_options.file_name.c_str()); - return false; + return; } } @@ -1184,11 +1183,9 @@ class CommandObjectSourceList : public CommandObjectParsed { } else { result.AppendErrorWithFormat("No comp unit found for: \"%s.\"\n", m_options.file_name.c_str()); - return false; } } } - return result.Succeeded(); } const SymbolContextList *GetBreakpointLocations() { @@ -1213,7 +1210,7 @@ class CommandObjectSourceCacheDump : public CommandObjectParsed { ~CommandObjectSourceCacheDump() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { // Dump the debugger source cache. result.GetOutputStream() << "Debugger Source File Cache\n"; SourceManager::SourceFileCache &cache = GetDebugger().GetSourceFileCache(); @@ -1227,7 +1224,6 @@ class CommandObjectSourceCacheDump : public CommandObjectParsed { } result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -1240,7 +1236,7 @@ class CommandObjectSourceCacheClear : public CommandObjectParsed { ~CommandObjectSourceCacheClear() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { // Clear the debugger cache. SourceManager::SourceFileCache &cache = GetDebugger().GetSourceFileCache(); cache.Clear(); @@ -1250,7 +1246,6 @@ class CommandObjectSourceCacheClear : public CommandObjectParsed { process_sp->GetSourceFileCache().Clear(); result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); } }; diff --git a/lldb/source/Commands/CommandObjectStats.cpp b/lldb/source/Commands/CommandObjectStats.cpp index b0b497cd80ba87..262de0bda144a6 100644 --- a/lldb/source/Commands/CommandObjectStats.cpp +++ b/lldb/source/Commands/CommandObjectStats.cpp @@ -26,15 +26,14 @@ class CommandObjectStatsEnable : public CommandObjectParsed { ~CommandObjectStatsEnable() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (DebuggerStats::GetCollectingStats()) { result.AppendError("statistics already enabled"); - return false; + return; } DebuggerStats::SetCollectingStats(true); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } }; @@ -48,15 +47,14 @@ class CommandObjectStatsDisable : public CommandObjectParsed { ~CommandObjectStatsDisable() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (!DebuggerStats::GetCollectingStats()) { result.AppendError("need to enable statistics before disabling them"); - return false; + return; } DebuggerStats::SetCollectingStats(false); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } }; @@ -105,7 +103,7 @@ class CommandObjectStatsDump : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = nullptr; if (!m_options.m_all_targets) target = m_exe_ctx.GetTargetPtr(); @@ -113,7 +111,6 @@ class CommandObjectStatsDump : public CommandObjectParsed { result.AppendMessageWithFormatv( "{0:2}", DebuggerStats::ReportStatistics(GetDebugger(), target)); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } CommandOptions m_options; diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index 7c20893db243c9..c84a6550d6c75c 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -263,7 +263,7 @@ class CommandObjectTargetCreate : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); FileSpec core_file(m_core_file.GetOptionValue().GetCurrentValue()); FileSpec remote_file(m_remote_file.GetOptionValue().GetCurrentValue()); @@ -276,7 +276,7 @@ class CommandObjectTargetCreate : public CommandObjectParsed { result.AppendErrorWithFormatv("Cannot open '{0}': {1}.", core_file.GetPath(), llvm::toString(file.takeError())); - return false; + return; } } @@ -290,7 +290,7 @@ class CommandObjectTargetCreate : public CommandObjectParsed { result.AppendErrorWithFormatv("Cannot open '{0}': {1}.", symfile.GetPath(), llvm::toString(file.takeError())); - return false; + return; } } @@ -310,7 +310,7 @@ class CommandObjectTargetCreate : public CommandObjectParsed { if (!target_sp) { result.AppendError(error.AsCString()); - return false; + return; } const llvm::StringRef label = @@ -318,7 +318,7 @@ class CommandObjectTargetCreate : public CommandObjectParsed { if (!label.empty()) { if (auto E = target_sp->SetLabel(label)) result.SetError(std::move(E)); - return false; + return; } auto on_error = llvm::make_scope_exit( @@ -353,7 +353,7 @@ class CommandObjectTargetCreate : public CommandObjectParsed { Status err = platform_sp->PutFile(file_spec, remote_file); if (err.Fail()) { result.AppendError(err.AsCString()); - return false; + return; } } } else { @@ -367,7 +367,7 @@ class CommandObjectTargetCreate : public CommandObjectParsed { Status err = platform_sp->GetFile(remote_file, file_spec); if (err.Fail()) { result.AppendError(err.AsCString()); - return false; + return; } } else { // If the remote file exists, we can debug reading that out of @@ -381,12 +381,12 @@ class CommandObjectTargetCreate : public CommandObjectParsed { if (platform_sp->IsHost()) { result.AppendError("Supply a local file, not a remote file, " "when debugging on the host."); - return false; + return; } if (platform_sp->IsConnected() && !platform_sp->GetFileExists(remote_file)) { result.AppendError("remote --> local transfer without local " "path is not implemented yet"); - return false; + return; } // Since there's only a remote file, we need to set the executable // file spec to the remote one. @@ -397,7 +397,7 @@ class CommandObjectTargetCreate : public CommandObjectParsed { } } else { result.AppendError("no platform found for target"); - return false; + return; } } @@ -438,7 +438,7 @@ class CommandObjectTargetCreate : public CommandObjectParsed { if (error.Fail()) { result.AppendError( error.AsCString("can't find plug-in for core file")); - return false; + return; } else { result.AppendMessageWithFormatv( "Core file '{0}' ({1}) was loaded.\n", core_file.GetPath(), @@ -464,8 +464,6 @@ class CommandObjectTargetCreate : public CommandObjectParsed { "argument, or use the --core option.\n", m_cmd_name.c_str()); } - - return result.Succeeded(); } private: @@ -492,7 +490,7 @@ class CommandObjectTargetList : public CommandObjectParsed { ~CommandObjectTargetList() override = default; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Stream &strm = result.GetOutputStream(); bool show_stopped_process_status = false; @@ -501,7 +499,6 @@ class CommandObjectTargetList : public CommandObjectParsed { strm.PutCString("No targets.\n"); } result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -520,7 +517,7 @@ class CommandObjectTargetSelect : public CommandObjectParsed { ~CommandObjectTargetSelect() override = default; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { if (args.GetArgumentCount() == 1) { const char *target_identifier = args.GetArgumentAtIndex(0); uint32_t target_idx = LLDB_INVALID_INDEX32; @@ -570,7 +567,6 @@ class CommandObjectTargetSelect : public CommandObjectParsed { result.AppendError( "'target select' takes a single argument: a target index\n"); } - return result.Succeeded(); } }; @@ -606,7 +602,7 @@ class CommandObjectTargetDelete : public CommandObjectParsed { Options *GetOptions() override { return &m_option_group; } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { const size_t argc = args.GetArgumentCount(); std::vector delete_target_list; TargetList &target_list = GetDebugger().GetTargetList(); @@ -620,7 +616,7 @@ class CommandObjectTargetDelete : public CommandObjectParsed { // Bail out if don't have any targets. if (num_targets == 0) { result.AppendError("no targets to delete"); - return false; + return; } for (auto &entry : args.entries()) { @@ -628,7 +624,7 @@ class CommandObjectTargetDelete : public CommandObjectParsed { if (entry.ref().getAsInteger(0, target_idx)) { result.AppendErrorWithFormat("invalid target index '%s'\n", entry.c_str()); - return false; + return; } if (target_idx < num_targets) { target_sp = target_list.GetTargetAtIndex(target_idx); @@ -646,13 +642,13 @@ class CommandObjectTargetDelete : public CommandObjectParsed { "target index %u is out of range, the only valid index is 0\n", target_idx); - return false; + return; } } else { target_sp = target_list.GetSelectedTarget(); if (!target_sp) { result.AppendErrorWithFormat("no target is currently selected\n"); - return false; + return; } delete_target_list.push_back(target_sp); } @@ -673,7 +669,7 @@ class CommandObjectTargetDelete : public CommandObjectParsed { (uint32_t)num_targets_to_delete); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } OptionGroupOptions m_option_group; @@ -694,7 +690,7 @@ class CommandObjectTargetShowLaunchEnvironment : public CommandObjectParsed { ~CommandObjectTargetShowLaunchEnvironment() override = default; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Target *target = m_exe_ctx.GetTargetPtr(); Environment env = target->GetEnvironment(); @@ -712,7 +708,6 @@ class CommandObjectTargetShowLaunchEnvironment : public CommandObjectParsed { strm.Format("{0}={1}\n", KV->first(), KV->second); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -865,7 +860,7 @@ class CommandObjectTargetVariable : public CommandObjectParsed { } } - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Target *target = m_exe_ctx.GetTargetPtr(); const size_t argc = args.GetArgumentCount(); Stream &s = result.GetOutputStream(); @@ -882,7 +877,7 @@ class CommandObjectTargetVariable : public CommandObjectParsed { if (!regex.IsValid()) { result.GetErrorStream().Printf( "error: invalid regular expression: '%s'\n", arg.c_str()); - return false; + return; } use_var_name = true; target->GetImages().FindGlobalVariables(regex, UINT32_MAX, @@ -898,7 +893,7 @@ class CommandObjectTargetVariable : public CommandObjectParsed { if (matches == 0) { result.AppendErrorWithFormat("can't find global variable '%s'", arg.c_str()); - return false; + return; } else { for (uint32_t global_idx = 0; global_idx < matches; ++global_idx) { VariableSP var_sp(variable_list.GetVariableAtIndex(global_idx)); @@ -1016,8 +1011,6 @@ class CommandObjectTargetVariable : public CommandObjectParsed { m_interpreter.PrintWarningsIfNecessary(result.GetOutputStream(), m_cmd_name); - - return result.Succeeded(); } OptionGroupOptions m_option_group; @@ -1064,7 +1057,7 @@ class CommandObjectTargetModulesSearchPathsAdd : public CommandObjectParsed { ~CommandObjectTargetModulesSearchPathsAdd() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); const size_t argc = command.GetArgumentCount(); if (argc & 1) { @@ -1094,7 +1087,6 @@ class CommandObjectTargetModulesSearchPathsAdd : public CommandObjectParsed { } } } - return result.Succeeded(); } }; @@ -1112,12 +1104,11 @@ class CommandObjectTargetModulesSearchPathsClear : public CommandObjectParsed { ~CommandObjectTargetModulesSearchPathsClear() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); bool notify = true; target->GetImageSearchPathList().Clear(notify); result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); } }; @@ -1187,7 +1178,7 @@ class CommandObjectTargetModulesSearchPathsInsert : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); size_t argc = command.GetArgumentCount(); // check for at least 3 arguments and an odd number of parameters @@ -1198,7 +1189,7 @@ class CommandObjectTargetModulesSearchPathsInsert : public CommandObjectParsed { result.AppendErrorWithFormat( " parameter is not an integer: '%s'.\n", command.GetArgumentAtIndex(0)); - return result.Succeeded(); + return; } // shift off the index @@ -1219,14 +1210,12 @@ class CommandObjectTargetModulesSearchPathsInsert : public CommandObjectParsed { result.AppendError(" can't be empty\n"); else result.AppendError(" can't be empty\n"); - return false; + return; } } } else { result.AppendError("insert requires at least three arguments\n"); - return result.Succeeded(); } - return result.Succeeded(); } }; @@ -1244,12 +1233,11 @@ class CommandObjectTargetModulesSearchPathsList : public CommandObjectParsed { ~CommandObjectTargetModulesSearchPathsList() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); target->GetImageSearchPathList().Dump(&result.GetOutputStream()); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -1280,11 +1268,11 @@ class CommandObjectTargetModulesSearchPathsQuery : public CommandObjectParsed { ~CommandObjectTargetModulesSearchPathsQuery() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); if (command.GetArgumentCount() != 1) { result.AppendError("query requires one argument\n"); - return result.Succeeded(); + return; } ConstString orig(command.GetArgumentAtIndex(0)); @@ -1295,7 +1283,6 @@ class CommandObjectTargetModulesSearchPathsQuery : public CommandObjectParsed { result.GetOutputStream().Printf("%s\n", orig.GetCString()); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -1962,7 +1949,7 @@ class CommandObjectTargetModulesDumpObjfile ~CommandObjectTargetModulesDumpObjfile() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); uint32_t addr_byte_size = target->GetArchitecture().GetAddressByteSize(); @@ -2001,7 +1988,6 @@ class CommandObjectTargetModulesDumpObjfile } else { result.AppendError("no matching executable images found"); } - return result.Succeeded(); } }; @@ -2064,7 +2050,7 @@ class CommandObjectTargetModulesDumpSymtab }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); uint32_t num_dumped = 0; Mangled::NamePreference name_preference = @@ -2100,7 +2086,7 @@ class CommandObjectTargetModulesDumpSymtab } } else { result.AppendError("the target has no associated executable images"); - return false; + return; } } else { // Dump specified images (by basename or fullpath) @@ -2140,7 +2126,6 @@ class CommandObjectTargetModulesDumpSymtab else { result.AppendError("no matching executable images found"); } - return result.Succeeded(); } CommandOptions m_options; @@ -2163,7 +2148,7 @@ class CommandObjectTargetModulesDumpSections ~CommandObjectTargetModulesDumpSections() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); uint32_t num_dumped = 0; @@ -2176,7 +2161,7 @@ class CommandObjectTargetModulesDumpSections const size_t num_modules = target->GetImages().GetSize(); if (num_modules == 0) { result.AppendError("the target has no associated executable images"); - return false; + return; } result.GetOutputStream().Format("Dumping sections for {0} modules.\n", @@ -2231,7 +2216,6 @@ class CommandObjectTargetModulesDumpSections else { result.AppendError("no matching executable images found"); } - return result.Succeeded(); } }; @@ -2249,11 +2233,11 @@ class CommandObjectTargetModulesDumpClangPCMInfo : public CommandObjectParsed { ~CommandObjectTargetModulesDumpClangPCMInfo() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (command.GetArgumentCount() != 1) { result.AppendErrorWithFormat("'%s' takes exactly one pcm path argument.", m_cmd_name.c_str()); - return false; + return; } const char *pcm_path = command.GetArgumentAtIndex(0); @@ -2261,12 +2245,12 @@ class CommandObjectTargetModulesDumpClangPCMInfo : public CommandObjectParsed { if (pcm_file.GetFileNameExtension() != ".pcm") { result.AppendError("file must have a .pcm extension"); - return false; + return; } if (!FileSystem::Instance().Exists(pcm_file)) { result.AppendError("pcm file does not exist"); - return false; + return; } clang::CompilerInstance compiler; @@ -2286,8 +2270,6 @@ class CommandObjectTargetModulesDumpClangPCMInfo : public CommandObjectParsed { if (compiler.ExecuteAction(dump_module_info)) result.SetStatus(eReturnStatusSuccessFinishResult); - - return result.Succeeded(); } }; @@ -2308,14 +2290,14 @@ class CommandObjectTargetModulesDumpClangAST ~CommandObjectTargetModulesDumpClangAST() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); const ModuleList &module_list = target->GetImages(); const size_t num_modules = module_list.GetSize(); if (num_modules == 0) { result.AppendError("the target has no associated executable images"); - return false; + return; } if (command.GetArgumentCount() == 0) { @@ -2329,7 +2311,7 @@ class CommandObjectTargetModulesDumpClangAST sf->DumpClangAST(result.GetOutputStream()); } result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } // Dump specified ASTs (by basename or fullpath) @@ -2359,7 +2341,6 @@ class CommandObjectTargetModulesDumpClangAST } } result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } }; @@ -2380,7 +2361,7 @@ class CommandObjectTargetModulesDumpSymfile ~CommandObjectTargetModulesDumpSymfile() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); uint32_t num_dumped = 0; @@ -2395,7 +2376,7 @@ class CommandObjectTargetModulesDumpSymfile const size_t num_modules = target_modules.GetSize(); if (num_modules == 0) { result.AppendError("the target has no associated executable images"); - return false; + return; } result.GetOutputStream().Format( "Dumping debug symbols for {0} modules.\n", num_modules); @@ -2440,7 +2421,6 @@ class CommandObjectTargetModulesDumpSymfile else { result.AppendError("no matching executable images found"); } - return result.Succeeded(); } }; @@ -2464,7 +2444,7 @@ class CommandObjectTargetModulesDumpLineTable Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = m_exe_ctx.GetTargetPtr(); uint32_t total_num_dumped = 0; @@ -2474,7 +2454,7 @@ class CommandObjectTargetModulesDumpLineTable if (command.GetArgumentCount() == 0) { result.AppendError("file option must be specified."); - return result.Succeeded(); + return; } else { // Dump specified images (by basename or fullpath) const char *arg_cstr; @@ -2516,7 +2496,6 @@ class CommandObjectTargetModulesDumpLineTable else { result.AppendError("no source filenames matched any command arguments"); } - return result.Succeeded(); } class CommandOptions : public Options { @@ -2601,7 +2580,7 @@ class CommandObjectTargetModulesDumpSeparateDebugInfoFiles }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedTarget(); uint32_t num_dumped = 0; @@ -2617,7 +2596,7 @@ class CommandObjectTargetModulesDumpSeparateDebugInfoFiles const size_t num_modules = target_modules.GetSize(); if (num_modules == 0) { result.AppendError("the target has no associated executable images"); - return false; + return; } for (ModuleSP module_sp : target_modules.ModulesNoLocking()) { if (INTERRUPT_REQUESTED( @@ -2711,7 +2690,6 @@ class CommandObjectTargetModulesDumpSeparateDebugInfoFiles } else { result.AppendError("no matching executable images found"); } - return result.Succeeded(); } CommandOptions m_options; @@ -2800,7 +2778,7 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed { OptionGroupUUID m_uuid_option_group; OptionGroupFile m_symbol_file; - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); bool flush = false; @@ -2820,7 +2798,7 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed { target->GetOrCreateModule(module_spec, true /* notify */)); if (module_sp) { result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } else { StreamString strm; module_spec.GetUUID().Dump(strm); @@ -2843,7 +2821,7 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed { "or symbol file with UUID %s", strm.GetData()); } - return false; + return; } } else { StreamString strm; @@ -2852,12 +2830,12 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed { "Unable to locate the executable or symbol file with UUID %s", strm.GetData()); result.SetError(error); - return false; + return; } } else { result.AppendError( "one or more executable image paths must be specified"); - return false; + return; } } else { for (auto &entry : args.entries()) { @@ -2885,7 +2863,7 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed { else result.AppendErrorWithFormat("unsupported module: %s", entry.c_str()); - return false; + return; } else { flush = true; } @@ -2910,8 +2888,6 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed { if (process) process->Flush(); } - - return result.Succeeded(); } }; @@ -2952,7 +2928,7 @@ class CommandObjectTargetModulesLoad Options *GetOptions() override { return &m_option_group; } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); const bool load = m_load_option.GetOptionValue().GetCurrentValue(); const bool set_pc = m_pc_option.GetOptionValue().GetCurrentValue(); @@ -3025,14 +3001,14 @@ class CommandObjectTargetModulesLoad } else { result.AppendError("one or more section name + load " "address pair must be specified"); - return false; + return; } } else { if (m_slide_option.GetOptionValue().OptionWasSet()) { result.AppendError("The \"--slide \" option can't " "be used in conjunction with setting " "section load addresses.\n"); - return false; + return; } for (size_t i = 0; i < argc; i += 2) { @@ -3094,22 +3070,22 @@ class CommandObjectTargetModulesLoad Address file_entry = objfile->GetEntryPointAddress(); if (!process) { result.AppendError("No process"); - return false; + return; } if (set_pc && !file_entry.IsValid()) { result.AppendError("No entry address in object file"); - return false; + return; } std::vector loadables( objfile->GetLoadableData(*target)); if (loadables.size() == 0) { result.AppendError("No loadable sections"); - return false; + return; } Status error = process->WriteObjectFile(std::move(loadables)); if (error.Fail()) { result.AppendError(error.AsCString()); - return false; + return; } if (set_pc) { ThreadList &thread_list = process->GetThreadList(); @@ -3171,9 +3147,7 @@ class CommandObjectTargetModulesLoad } else { result.AppendError("either the \"--file \" or the \"--uuid " "\" option must be specified.\n"); - return false; } - return result.Succeeded(); } OptionGroupOptions m_option_group; @@ -3245,7 +3219,7 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = GetDebugger().GetSelectedTarget().get(); const bool use_global_module_list = m_options.m_use_global_module_list; // Define a local module list here to ensure it lives longer than any @@ -3255,7 +3229,7 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { if (target == nullptr && !use_global_module_list) { result.AppendError("invalid target, create a debug target using the " "'target create' command"); - return false; + return; } else { if (target) { uint32_t addr_byte_size = @@ -3288,7 +3262,7 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { result.AppendError( "Can only look up modules by address with a valid target."); } - return result.Succeeded(); + return; } size_t num_modules = 0; @@ -3318,7 +3292,7 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { if (argc == 1) { result.AppendErrorWithFormat("no modules found that match '%s'", arg.c_str()); - return false; + return; } } } @@ -3364,10 +3338,9 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { result.AppendError( "the target has no associated executable images"); } - return false; + return; } } - return result.Succeeded(); } void PrintModule(Target *target, Module *module, int indent, Stream &strm) { @@ -3601,7 +3574,7 @@ class CommandObjectTargetModulesShowUnwind : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = m_exe_ctx.GetTargetPtr(); Process *process = m_exe_ctx.GetProcessPtr(); ABI *abi = nullptr; @@ -3611,19 +3584,19 @@ class CommandObjectTargetModulesShowUnwind : public CommandObjectParsed { if (process == nullptr) { result.AppendError( "You must have a process running to use this command."); - return false; + return; } ThreadList threads(process->GetThreadList()); if (threads.GetSize() == 0) { result.AppendError("The process must be paused to use this command."); - return false; + return; } ThreadSP thread(threads.GetThreadAtIndex(0)); if (!thread) { result.AppendError("The process must be paused to use this command."); - return false; + return; } SymbolContextList sc_list; @@ -3650,13 +3623,13 @@ class CommandObjectTargetModulesShowUnwind : public CommandObjectParsed { } else { result.AppendError( "address-expression or function name option must be specified."); - return false; + return; } if (sc_list.GetSize() == 0) { result.AppendErrorWithFormat("no unwind data found that matches '%s'.", m_options.m_str.c_str()); - return false; + return; } for (const SymbolContext &sc : sc_list) { @@ -3855,7 +3828,6 @@ class CommandObjectTargetModulesShowUnwind : public CommandObjectParsed { result.GetOutputStream().Printf("\n"); } - return result.Succeeded(); } CommandOptions m_options; @@ -4159,7 +4131,7 @@ class CommandObjectTargetModulesLookup : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); bool syntax_error = false; uint32_t i; @@ -4180,7 +4152,7 @@ class CommandObjectTargetModulesLookup : public CommandObjectParsed { num_successful_lookups++; if (!m_options.m_print_all) { result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); + return; } } @@ -4190,7 +4162,7 @@ class CommandObjectTargetModulesLookup : public CommandObjectParsed { std::lock_guard guard(target_modules.GetMutex()); if (target_modules.GetSize() == 0) { result.AppendError("the target has no associated executable images"); - return false; + return; } for (ModuleSP module_sp : target_modules.ModulesNoLocking()) { @@ -4230,7 +4202,6 @@ class CommandObjectTargetModulesLookup : public CommandObjectParsed { result.SetStatus(eReturnStatusSuccessFinishResult); else result.SetStatus(eReturnStatusFailed); - return result.Succeeded(); } CommandOptions m_options; @@ -4679,7 +4650,7 @@ class CommandObjectTargetSymbolsAdd : public CommandObjectParsed { return true; } - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Target *target = m_exe_ctx.GetTargetPtr(); result.SetStatus(eReturnStatusFailed); bool flush = false; @@ -4764,7 +4735,6 @@ class CommandObjectTargetSymbolsAdd : public CommandObjectParsed { if (process) process->Flush(); } - return result.Succeeded(); } OptionGroupOptions m_option_group; @@ -5066,7 +5036,7 @@ Filter Options: io_handler.SetIsDone(true); } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { m_stop_hook_sp.reset(); Target &target = GetSelectedOrDummyTarget(); @@ -5163,7 +5133,7 @@ Filter Options: result.AppendErrorWithFormat("Couldn't add stop hook: %s", error.AsCString()); target.UndoCreateStopHook(new_hook_sp->GetID()); - return false; + return; } } else { m_stop_hook_sp = new_hook_sp; @@ -5171,8 +5141,6 @@ Filter Options: *this); // IOHandlerDelegate } result.SetStatus(eReturnStatusSuccessFinishNoResult); - - return result.Succeeded(); } private: @@ -5209,14 +5177,14 @@ class CommandObjectTargetStopHookDelete : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(); // FIXME: see if we can use the breakpoint id style parser? size_t num_args = command.GetArgumentCount(); if (num_args == 0) { if (!m_interpreter.Confirm("Delete all stop hooks?", true)) { result.SetStatus(eReturnStatusFailed); - return false; + return; } else { target.RemoveAllStopHooks(); } @@ -5226,17 +5194,16 @@ class CommandObjectTargetStopHookDelete : public CommandObjectParsed { if (!llvm::to_integer(command.GetArgumentAtIndex(i), user_id)) { result.AppendErrorWithFormat("invalid stop hook id: \"%s\".\n", command.GetArgumentAtIndex(i)); - return false; + return; } if (!target.RemoveStopHookByID(user_id)) { result.AppendErrorWithFormat("unknown stop hook id: \"%s\".\n", command.GetArgumentAtIndex(i)); - return false; + return; } } } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); } }; @@ -5266,7 +5233,7 @@ class CommandObjectTargetStopHookEnableDisable : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(); // FIXME: see if we can use the breakpoint id style parser? size_t num_args = command.GetArgumentCount(); @@ -5280,18 +5247,17 @@ class CommandObjectTargetStopHookEnableDisable : public CommandObjectParsed { if (!llvm::to_integer(command.GetArgumentAtIndex(i), user_id)) { result.AppendErrorWithFormat("invalid stop hook id: \"%s\".\n", command.GetArgumentAtIndex(i)); - return false; + return; } success = target.SetStopHookActiveStateByID(user_id, m_enable); if (!success) { result.AppendErrorWithFormat("unknown stop hook id: \"%s\".\n", command.GetArgumentAtIndex(i)); - return false; + return; } } } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); } private: @@ -5311,7 +5277,7 @@ class CommandObjectTargetStopHookList : public CommandObjectParsed { ~CommandObjectTargetStopHookList() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedOrDummyTarget(); size_t num_hooks = target.GetNumStopHooks(); @@ -5327,7 +5293,6 @@ class CommandObjectTargetStopHookList : public CommandObjectParsed { } } result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -5377,14 +5342,13 @@ class CommandObjectTargetDumpTypesystem : public CommandObjectParsed { ~CommandObjectTargetDumpTypesystem() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { // Go over every scratch TypeSystem and dump to the command output. for (lldb::TypeSystemSP ts : GetSelectedTarget().GetScratchTypeSystems()) if (ts) ts->Dump(result.GetOutputStream().AsRawOstream()); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -5403,11 +5367,10 @@ class CommandObjectTargetDumpSectionLoadList : public CommandObjectParsed { ~CommandObjectTargetDumpSectionLoadList() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target &target = GetSelectedTarget(); target.GetSectionLoadList().Dump(result.GetOutputStream(), &target); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp index 64f3edcad5639d..a9f5a4f8a4fbd7 100644 --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -412,7 +412,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { Options *GetOptions() override { return &m_all_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Process *process = m_exe_ctx.GetProcessPtr(); bool synchronous_execution = m_interpreter.GetSynchronous(); @@ -424,7 +424,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { if (thread == nullptr) { result.AppendError("no selected thread in process"); - return false; + return; } } else { const char *thread_idx_cstr = command.GetArgumentAtIndex(0); @@ -433,7 +433,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { if (!llvm::to_integer(thread_idx_cstr, step_thread_idx)) { result.AppendErrorWithFormat("invalid thread index '%s'.\n", thread_idx_cstr); - return false; + return; } thread = process->GetThreadList().FindThreadByIndexID(step_thread_idx).get(); @@ -441,20 +441,20 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { result.AppendErrorWithFormat( "Thread index %u is out of range (valid values are 0 - %u).\n", step_thread_idx, num_threads); - return false; + return; } } if (m_step_type == eStepTypeScripted) { if (m_class_options.GetName().empty()) { result.AppendErrorWithFormat("empty class name for scripted step."); - return false; + return; } else if (!GetDebugger().GetScriptInterpreter()->CheckObjectExists( m_class_options.GetName().c_str())) { result.AppendErrorWithFormat( "class for scripted step: \"%s\" does not exist.", m_class_options.GetName().c_str()); - return false; + return; } } @@ -462,7 +462,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { m_step_type != eStepTypeInto) { result.AppendErrorWithFormat( "end line option is only valid for step into"); - return false; + return; } const bool abort_other_plans = false; @@ -494,14 +494,14 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { error)) { result.AppendErrorWithFormat("invalid end-line option: %s.", error.AsCString()); - return false; + return; } } else if (m_options.m_end_line_is_block_end) { Status error; Block *block = frame->GetSymbolContext(eSymbolContextBlock).block; if (!block) { result.AppendErrorWithFormat("Could not find the current block."); - return false; + return; } AddressRange block_range; @@ -510,7 +510,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { if (!block_range.GetBaseAddress().IsValid()) { result.AppendErrorWithFormat( "Could not find the current block address."); - return false; + return; } lldb::addr_t pc_offset_in_block = pc_address.GetFileAddress() - @@ -569,7 +569,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { new_plan_status); } else { result.AppendError("step type is not supported"); - return false; + return; } // If we got a new plan, then set it to be a controlling plan (User level @@ -600,7 +600,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { if (!error.Success()) { result.AppendMessage(error.AsCString()); - return false; + return; } // There is a race condition where this thread will return up the call @@ -624,7 +624,6 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { } else { result.SetError(new_plan_status); } - return result.Succeeded(); } StepType m_step_type; @@ -672,13 +671,13 @@ class CommandObjectThreadContinue : public CommandObjectParsed { nullptr); } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { bool synchronous_execution = m_interpreter.GetSynchronous(); Process *process = m_exe_ctx.GetProcessPtr(); if (process == nullptr) { result.AppendError("no process exists. Cannot continue"); - return false; + return; } StateType state = process->GetState(); @@ -698,7 +697,7 @@ class CommandObjectThreadContinue : public CommandObjectParsed { if (entry.ref().getAsInteger(0, thread_idx)) { result.AppendErrorWithFormat( "invalid thread index argument: \"%s\".\n", entry.c_str()); - return false; + return; } Thread *thread = process->GetThreadList().FindThreadByIndexID(thread_idx).get(); @@ -708,13 +707,13 @@ class CommandObjectThreadContinue : public CommandObjectParsed { } else { result.AppendErrorWithFormat("invalid thread index %u.\n", thread_idx); - return false; + return; } } if (resume_threads.empty()) { result.AppendError("no valid thread indexes were specified"); - return false; + return; } else { if (resume_threads.size() == 1) result.AppendMessageWithFormat("Resuming thread: "); @@ -753,7 +752,7 @@ class CommandObjectThreadContinue : public CommandObjectParsed { Thread *current_thread = GetDefaultThread(); if (current_thread == nullptr) { result.AppendError("the process doesn't have a current thread"); - return false; + return; } // Set the actions that the threads should each take when resuming for (uint32_t idx = 0; idx < num_threads; ++idx) { @@ -801,8 +800,6 @@ class CommandObjectThreadContinue : public CommandObjectParsed { "Process cannot be continued from its current state (%s).\n", StateAsCString(state)); } - - return result.Succeeded(); } }; @@ -920,7 +917,7 @@ class CommandObjectThreadUntil : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { bool synchronous_execution = m_interpreter.GetSynchronous(); Target *target = &GetSelectedTarget(); @@ -939,14 +936,14 @@ class CommandObjectThreadUntil : public CommandObjectParsed { if (!llvm::to_integer(command.GetArgumentAtIndex(i), line_number)) { result.AppendErrorWithFormat("invalid line number: '%s'.\n", command.GetArgumentAtIndex(i)); - return false; + return; } else line_numbers.push_back(line_number); } } else if (m_options.m_until_addrs.empty()) { result.AppendErrorWithFormat("No line number or address provided:\n%s", GetSyntax().str().c_str()); - return false; + return; } if (m_options.m_thread_idx == LLDB_INVALID_THREAD_ID) { @@ -962,7 +959,7 @@ class CommandObjectThreadUntil : public CommandObjectParsed { result.AppendErrorWithFormat( "Thread index %u is out of range (valid values are 0 - %u).\n", m_options.m_thread_idx, num_threads); - return false; + return; } const bool abort_other_plans = false; @@ -973,7 +970,7 @@ class CommandObjectThreadUntil : public CommandObjectParsed { result.AppendErrorWithFormat( "Frame index %u is out of range for thread id %" PRIu64 ".\n", m_options.m_frame_idx, thread->GetID()); - return false; + return; } ThreadPlanSP new_plan_sp; @@ -991,7 +988,7 @@ class CommandObjectThreadUntil : public CommandObjectParsed { result.AppendErrorWithFormat("Failed to resolve the line table for " "frame %u of thread id %" PRIu64 ".\n", m_options.m_frame_idx, thread->GetID()); - return false; + return; } LineEntry function_start; @@ -1003,7 +1000,7 @@ class CommandObjectThreadUntil : public CommandObjectParsed { if (!sc.function) { result.AppendErrorWithFormat("Have debug information but no " "function info - can't get until range."); - return false; + return; } AddressRange fun_addr_range = sc.function->GetAddressRange(); @@ -1067,7 +1064,7 @@ class CommandObjectThreadUntil : public CommandObjectParsed { result.AppendErrorWithFormat( "Until target outside of the current function.\n"); - return false; + return; } new_plan_sp = thread->QueueThreadPlanForStepUntil( @@ -1083,20 +1080,20 @@ class CommandObjectThreadUntil : public CommandObjectParsed { new_plan_sp->SetOkayToDiscard(false); } else { result.SetError(new_plan_status); - return false; + return; } } else { result.AppendErrorWithFormat("Frame index %u of thread id %" PRIu64 " has no debug information.\n", m_options.m_frame_idx, thread->GetID()); - return false; + return; } if (!process->GetThreadList().SetSelectedThreadByID(thread->GetID())) { result.AppendErrorWithFormat( "Failed to set the selected thread to thread id %" PRIu64 ".\n", thread->GetID()); - return false; + return; } StreamString stream; @@ -1125,7 +1122,6 @@ class CommandObjectThreadUntil : public CommandObjectParsed { error.AsCString()); } } - return result.Succeeded(); } CommandOptions m_options; @@ -1170,23 +1166,23 @@ class CommandObjectThreadSelect : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Process *process = m_exe_ctx.GetProcessPtr(); if (process == nullptr) { result.AppendError("no process"); - return false; + return; } else if (command.GetArgumentCount() != 1) { result.AppendErrorWithFormat( "'%s' takes exactly one thread index argument:\nUsage: %s\n", m_cmd_name.c_str(), m_cmd_syntax.c_str()); - return false; + return; } uint32_t index_id; if (!llvm::to_integer(command.GetArgumentAtIndex(0), index_id)) { result.AppendErrorWithFormat("Invalid thread index '%s'", command.GetArgumentAtIndex(0)); - return false; + return; } Thread *new_thread = @@ -1194,13 +1190,11 @@ class CommandObjectThreadSelect : public CommandObjectParsed { if (new_thread == nullptr) { result.AppendErrorWithFormat("invalid thread #%s.\n", command.GetArgumentAtIndex(0)); - return false; + return; } process->GetThreadList().SetSelectedThreadByID(new_thread->GetID(), true); result.SetStatus(eReturnStatusSuccessFinishNoResult); - - return result.Succeeded(); } }; @@ -1221,7 +1215,7 @@ class CommandObjectThreadList : public CommandObjectParsed { ~CommandObjectThreadList() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Stream &strm = result.GetOutputStream(); result.SetStatus(eReturnStatusSuccessFinishNoResult); Process *process = m_exe_ctx.GetProcessPtr(); @@ -1232,7 +1226,6 @@ class CommandObjectThreadList : public CommandObjectParsed { process->GetStatus(strm); process->GetThreadStatus(strm, only_threads_with_stop_reason, start_frame, num_frames, num_frames_with_source, false); - return result.Succeeded(); } }; @@ -1511,7 +1504,7 @@ class CommandObjectThreadReturn : public CommandObjectRaw { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(llvm::StringRef command, + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override { // I am going to handle this by hand, because I don't want you to have to // say: @@ -1539,7 +1532,7 @@ class CommandObjectThreadReturn : public CommandObjectRaw { "Could not select 0th frame after unwinding expression."); } } - return result.Succeeded(); + return; } ValueObjectSP return_valobj_sp; @@ -1549,7 +1542,7 @@ class CommandObjectThreadReturn : public CommandObjectRaw { if (frame_sp->IsInlined()) { result.AppendError("Don't know how to return from inlined frames."); - return false; + return; } if (!command.empty()) { @@ -1570,7 +1563,7 @@ class CommandObjectThreadReturn : public CommandObjectRaw { else result.AppendErrorWithFormat( "Unknown error evaluating result expression."); - return false; + return; } } @@ -1582,11 +1575,10 @@ class CommandObjectThreadReturn : public CommandObjectRaw { result.AppendErrorWithFormat( "Error returning from frame %d of thread %d: %s.", frame_idx, thread_sp->GetIndexID(), error.AsCString()); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } CommandOptions m_options; @@ -1667,7 +1659,7 @@ class CommandObjectThreadJump : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { RegisterContext *reg_ctx = m_exe_ctx.GetRegisterContext(); StackFrame *frame = m_exe_ctx.GetFramePtr(); Thread *thread = m_exe_ctx.GetThreadPtr(); @@ -1682,13 +1674,13 @@ class CommandObjectThreadJump : public CommandObjectParsed { lldb::addr_t callAddr = dest.GetCallableLoadAddress(target); if (callAddr == LLDB_INVALID_ADDRESS) { result.AppendErrorWithFormat("Invalid destination address."); - return false; + return; } if (!reg_ctx->SetPC(callAddr)) { result.AppendErrorWithFormat("Error changing PC value for thread %d.", thread->GetIndexID()); - return false; + return; } } else { // Pick either the absolute line, or work out a relative one. @@ -1704,7 +1696,7 @@ class CommandObjectThreadJump : public CommandObjectParsed { if (!file) { result.AppendErrorWithFormat( "No source file available for the current location."); - return false; + return; } std::string warnings; @@ -1712,7 +1704,7 @@ class CommandObjectThreadJump : public CommandObjectParsed { if (err.Fail()) { result.SetError(err); - return false; + return; } if (!warnings.empty()) @@ -1720,7 +1712,6 @@ class CommandObjectThreadJump : public CommandObjectParsed { } result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } CommandOptions m_options; @@ -1804,7 +1795,7 @@ class CommandObjectThreadPlanList : public CommandObjectIterateOverThreads { Options *GetOptions() override { return &m_options; } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { // If we are reporting all threads, dispatch to the Process to do that: if (command.GetArgumentCount() == 0 && m_options.m_tids.empty()) { Stream &strm = result.GetOutputStream(); @@ -1814,7 +1805,7 @@ class CommandObjectThreadPlanList : public CommandObjectIterateOverThreads { m_exe_ctx.GetProcessPtr()->DumpThreadPlans( strm, desc_level, m_options.m_internal, true, m_options.m_unreported); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } else { // Do any TID's that the user may have specified as TID, then do any // Thread Indexes... @@ -1829,7 +1820,7 @@ class CommandObjectThreadPlanList : public CommandObjectIterateOverThreads { if (!success) { result.AppendError("Error dumping plans:"); result.AppendError(tmp_strm.GetString()); - return false; + return; } // Otherwise, add our data to the output: result.GetOutputStream() << tmp_strm.GetString(); @@ -1899,13 +1890,13 @@ class CommandObjectThreadPlanDiscard : public CommandObjectParsed { m_exe_ctx.GetThreadPtr()->AutoCompleteThreadPlans(request); } - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Thread *thread = m_exe_ctx.GetThreadPtr(); if (args.GetArgumentCount() != 1) { result.AppendErrorWithFormat("Too many arguments, expected one - the " "thread plan index - but got %zu.", args.GetArgumentCount()); - return false; + return; } uint32_t thread_plan_idx; @@ -1913,23 +1904,21 @@ class CommandObjectThreadPlanDiscard : public CommandObjectParsed { result.AppendErrorWithFormat( "Invalid thread index: \"%s\" - should be unsigned int.", args.GetArgumentAtIndex(0)); - return false; + return; } if (thread_plan_idx == 0) { result.AppendErrorWithFormat( "You wouldn't really want me to discard the base thread plan."); - return false; + return; } if (thread->DiscardUserThreadPlansUpToIndex(thread_plan_idx)) { result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; } else { result.AppendErrorWithFormat( "Could not find User thread plan with index %s.", args.GetArgumentAtIndex(0)); - return false; } } }; @@ -1965,13 +1954,13 @@ class CommandObjectThreadPlanPrune : public CommandObjectParsed { ~CommandObjectThreadPlanPrune() override = default; - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { Process *process = m_exe_ctx.GetProcessPtr(); if (args.GetArgumentCount() == 0) { process->PruneThreadPlans(); result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; + return; } const size_t num_args = args.GetArgumentCount(); @@ -1984,16 +1973,15 @@ class CommandObjectThreadPlanPrune : public CommandObjectParsed { if (!llvm::to_integer(args.GetArgumentAtIndex(i), tid)) { result.AppendErrorWithFormat("invalid thread specification: \"%s\"\n", args.GetArgumentAtIndex(i)); - return false; + return; } if (!process->PruneThreadPlansForTID(tid)) { result.AppendErrorWithFormat("Could not find unreported tid: \"%s\"\n", args.GetArgumentAtIndex(i)); - return false; + return; } } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; } }; @@ -2187,11 +2175,11 @@ class CommandObjectTraceDumpFunctionCalls : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { ThreadSP thread_sp = GetSingleThreadFromArgs(m_exe_ctx, args, result); if (!thread_sp) { result.AppendError("invalid thread\n"); - return false; + return; } llvm::Expected cursor_or_error = @@ -2199,7 +2187,7 @@ class CommandObjectTraceDumpFunctionCalls : public CommandObjectParsed { if (!cursor_or_error) { result.AppendError(llvm::toString(cursor_or_error.takeError())); - return false; + return; } TraceCursorSP &cursor_sp = *cursor_or_error; @@ -2217,7 +2205,6 @@ class CommandObjectTraceDumpFunctionCalls : public CommandObjectParsed { m_options.m_dumper_options); dumper.DumpFunctionCalls(); - return true; } CommandOptions m_options; @@ -2371,11 +2358,11 @@ class CommandObjectTraceDumpInstructions : public CommandObjectParsed { } protected: - bool DoExecute(Args &args, CommandReturnObject &result) override { + void DoExecute(Args &args, CommandReturnObject &result) override { ThreadSP thread_sp = GetSingleThreadFromArgs(m_exe_ctx, args, result); if (!thread_sp) { result.AppendError("invalid thread\n"); - return false; + return; } if (m_options.m_continue && m_last_id) { @@ -2390,14 +2377,14 @@ class CommandObjectTraceDumpInstructions : public CommandObjectParsed { if (!cursor_or_error) { result.AppendError(llvm::toString(cursor_or_error.takeError())); - return false; + return; } TraceCursorSP &cursor_sp = *cursor_or_error; if (m_options.m_dumper_options.id && !cursor_sp->HasId(*m_options.m_dumper_options.id)) { result.AppendError("invalid instruction id\n"); - return false; + return; } std::optional out_file; @@ -2419,7 +2406,6 @@ class CommandObjectTraceDumpInstructions : public CommandObjectParsed { m_options.m_dumper_options); m_last_id = dumper.DumpInstructions(m_options.m_count); - return true; } CommandOptions m_options; diff --git a/lldb/source/Commands/CommandObjectThreadUtil.cpp b/lldb/source/Commands/CommandObjectThreadUtil.cpp index 504d5fa0118d4c..d7fa4190a24509 100644 --- a/lldb/source/Commands/CommandObjectThreadUtil.cpp +++ b/lldb/source/Commands/CommandObjectThreadUtil.cpp @@ -34,16 +34,16 @@ CommandObjectMultipleThreads::CommandObjectMultipleThreads( m_arguments.push_back({thread_arg}); } -bool CommandObjectIterateOverThreads::DoExecute(Args &command, +void CommandObjectIterateOverThreads::DoExecute(Args &command, CommandReturnObject &result) { result.SetStatus(m_success_return); bool all_threads = false; if (command.GetArgumentCount() == 0) { Thread *thread = m_exe_ctx.GetThreadPtr(); - if (!thread || !HandleOneThread(thread->GetID(), result)) - return false; - return result.Succeeded(); + if (thread) + HandleOneThread(thread->GetID(), result); + return; } else if (command.GetArgumentCount() == 1) { all_threads = ::strcmp(command.GetArgumentAtIndex(0), "all") == 0; m_unique_stacks = ::strcmp(command.GetArgumentAtIndex(0), "unique") == 0; @@ -71,7 +71,7 @@ bool CommandObjectIterateOverThreads::DoExecute(Args &command, if (!llvm::to_integer(command.GetArgumentAtIndex(i), thread_idx)) { result.AppendErrorWithFormat("invalid thread specification: \"%s\"\n", command.GetArgumentAtIndex(i)); - return false; + return; } ThreadSP thread = @@ -80,7 +80,7 @@ bool CommandObjectIterateOverThreads::DoExecute(Args &command, if (!thread) { result.AppendErrorWithFormat("no thread with index: \"%s\"\n", command.GetArgumentAtIndex(i)); - return false; + return; } tids.push_back(thread->GetID()); @@ -92,7 +92,7 @@ bool CommandObjectIterateOverThreads::DoExecute(Args &command, std::set unique_stacks; for (const lldb::tid_t &tid : tids) { if (!BucketThread(tid, unique_stacks, result)) { - return false; + return; } } @@ -114,7 +114,7 @@ bool CommandObjectIterateOverThreads::DoExecute(Args &command, ThreadSP thread = process->GetThreadList().FindThreadByIndexID( representative_thread_id); if (!HandleOneThread(thread->GetID(), result)) { - return false; + return; } } } else { @@ -124,12 +124,11 @@ bool CommandObjectIterateOverThreads::DoExecute(Args &command, result.AppendMessage(""); if (!HandleOneThread(tid, result)) - return false; + return; ++idx; } } - return result.Succeeded(); } bool CommandObjectIterateOverThreads::BucketThread( @@ -167,7 +166,7 @@ bool CommandObjectIterateOverThreads::BucketThread( return true; } -bool CommandObjectMultipleThreads::DoExecute(Args &command, +void CommandObjectMultipleThreads::DoExecute(Args &command, CommandReturnObject &result) { Process &process = m_exe_ctx.GetProcessRef(); @@ -191,7 +190,7 @@ bool CommandObjectMultipleThreads::DoExecute(Args &command, if (!llvm::to_integer(command.GetArgumentAtIndex(i), thread_idx)) { result.AppendErrorWithFormat("invalid thread specification: \"%s\"\n", command.GetArgumentAtIndex(i)); - return false; + return; } ThreadSP thread = process.GetThreadList().FindThreadByIndexID(thread_idx); @@ -199,12 +198,12 @@ bool CommandObjectMultipleThreads::DoExecute(Args &command, if (!thread) { result.AppendErrorWithFormat("no thread with index: \"%s\"\n", command.GetArgumentAtIndex(i)); - return false; + return; } tids.push_back(thread->GetID()); } } - return DoExecuteOnThreads(command, result, tids); + DoExecuteOnThreads(command, result, tids); } diff --git a/lldb/source/Commands/CommandObjectThreadUtil.h b/lldb/source/Commands/CommandObjectThreadUtil.h index c8f51eabc043fe..74d1136bab7f12 100644 --- a/lldb/source/Commands/CommandObjectThreadUtil.h +++ b/lldb/source/Commands/CommandObjectThreadUtil.h @@ -54,7 +54,7 @@ class CommandObjectIterateOverThreads : public CommandObjectParsed { ~CommandObjectIterateOverThreads() override = default; - bool DoExecute(Args &command, CommandReturnObject &result) override; + void DoExecute(Args &command, CommandReturnObject &result) override; protected: // Override this to do whatever you need to do for one thread. @@ -84,7 +84,7 @@ class CommandObjectMultipleThreads : public CommandObjectParsed { const char *name, const char *help, const char *syntax, uint32_t flags); - bool DoExecute(Args &command, CommandReturnObject &result) override; + void DoExecute(Args &command, CommandReturnObject &result) override; protected: /// Method that handles the command after the main arguments have been parsed. diff --git a/lldb/source/Commands/CommandObjectTrace.cpp b/lldb/source/Commands/CommandObjectTrace.cpp index 52fb56ffc1fb73..e0c74e29aaa6bc 100644 --- a/lldb/source/Commands/CommandObjectTrace.cpp +++ b/lldb/source/Commands/CommandObjectTrace.cpp @@ -103,11 +103,11 @@ class CommandObjectTraceSave : public CommandObjectParsed { ~CommandObjectTraceSave() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (command.size() != 1) { result.AppendError("a single path to a directory where the trace bundle " "will be created is required"); - return false; + return; } FileSpec bundle_dir(command[0].ref()); @@ -125,8 +125,6 @@ class CommandObjectTraceSave : public CommandObjectParsed { } else { result.AppendError(toString(desc_file.takeError())); } - - return result.Succeeded(); } CommandOptions m_options; @@ -194,11 +192,11 @@ class CommandObjectTraceLoad : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (command.size() != 1) { result.AppendError("a single path to a JSON file containing a the " "description of the trace bundle is required"); - return false; + return; } const FileSpec trace_description_file(command[0].ref()); @@ -210,7 +208,7 @@ class CommandObjectTraceLoad : public CommandObjectParsed { if (!trace_or_err) { result.AppendErrorWithFormat( "%s\n", llvm::toString(trace_or_err.takeError()).c_str()); - return false; + return; } if (m_options.m_verbose) { @@ -219,7 +217,6 @@ class CommandObjectTraceLoad : public CommandObjectParsed { } result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } CommandOptions m_options; @@ -276,7 +273,7 @@ class CommandObjectTraceDump : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Status error; // TODO: fill in the dumping code here! if (error.Success()) { @@ -284,7 +281,6 @@ class CommandObjectTraceDump : public CommandObjectParsed { } else { result.AppendErrorWithFormat("%s\n", error.AsCString()); } - return result.Succeeded(); } CommandOptions m_options; @@ -345,12 +341,12 @@ class CommandObjectTraceSchema : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Status error; if (command.empty()) { result.AppendError( "trace schema cannot be invoked without a plug-in as argument"); - return false; + return; } StringRef plugin_name(command[0].c_str()); @@ -376,7 +372,6 @@ class CommandObjectTraceSchema : public CommandObjectParsed { } else { result.AppendErrorWithFormat("%s\n", error.AsCString()); } - return result.Succeeded(); } CommandOptions m_options; diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp index 2969f82f95882e..411dc2fb723cea 100644 --- a/lldb/source/Commands/CommandObjectType.cpp +++ b/lldb/source/Commands/CommandObjectType.cpp @@ -276,7 +276,7 @@ class CommandObjectTypeSummaryAdd : public CommandObjectParsed, Status *error = nullptr); protected: - bool DoExecute(Args &command, CommandReturnObject &result) override; + void DoExecute(Args &command, CommandReturnObject &result) override; }; static const char *g_synth_addreader_instructions = @@ -389,18 +389,17 @@ class CommandObjectTypeSynthAdd : public CommandObjectParsed, bool Execute_PythonClass(Args &command, CommandReturnObject &result); protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { WarnOnPotentialUnquotedUnsignedType(command, result); if (m_options.handwrite_python) - return Execute_HandwritePython(command, result); + Execute_HandwritePython(command, result); else if (m_options.is_class_based) - return Execute_PythonClass(command, result); + Execute_PythonClass(command, result); else { result.AppendError("must either provide a children list, a Python class " "name, or use -P and type a Python class " "line-by-line"); - return false; } } @@ -649,13 +648,13 @@ pointers to floats. Nor will it change the default display for Afloat and Bfloa ~CommandObjectTypeFormatAdd() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc < 1) { result.AppendErrorWithFormat("%s takes one or more args.\n", m_cmd_name.c_str()); - return false; + return; } const Format format = m_format_options.GetFormat(); @@ -663,7 +662,7 @@ pointers to floats. Nor will it change the default display for Afloat and Bfloa m_command_options.m_custom_type_name.empty()) { result.AppendErrorWithFormat("%s needs a valid format.\n", m_cmd_name.c_str()); - return false; + return; } TypeFormatImplSP entry; @@ -688,14 +687,14 @@ pointers to floats. Nor will it change the default display for Afloat and Bfloa DataVisualization::Categories::GetCategory( ConstString(m_command_options.m_category), category_sp); if (!category_sp) - return false; + return; WarnOnPotentialUnquotedUnsignedType(command, result); for (auto &arg_entry : command.entries()) { if (arg_entry.ref().empty()) { result.AppendError("empty typenames not allowed"); - return false; + return; } FormatterMatchType match_type = eFormatterMatchExact; @@ -705,14 +704,13 @@ pointers to floats. Nor will it change the default display for Afloat and Bfloa if (!typeRX.IsValid()) { result.AppendError( "regex format error (maybe this is not really a regex?)"); - return false; + return; } } category_sp->AddTypeFormat(arg_entry.ref(), match_type, entry); } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); } }; @@ -828,12 +826,12 @@ class CommandObjectTypeFormatterDelete : public CommandObjectParsed { protected: virtual bool FormatterSpecificDeletion(ConstString typeCS) { return false; } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc != 1) { result.AppendErrorWithFormat("%s takes 1 arg.\n", m_cmd_name.c_str()); - return false; + return; } const char *typeA = command.GetArgumentAtIndex(0); @@ -841,7 +839,7 @@ class CommandObjectTypeFormatterDelete : public CommandObjectParsed { if (!typeCS) { result.AppendError("empty typenames not allowed"); - return false; + return; } if (m_options.m_delete_all) { @@ -851,7 +849,7 @@ class CommandObjectTypeFormatterDelete : public CommandObjectParsed { return true; }); result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); + return; } bool delete_category = false; @@ -875,10 +873,8 @@ class CommandObjectTypeFormatterDelete : public CommandObjectParsed { if (delete_category || extra_deletion) { result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); } else { result.AppendErrorWithFormat("no custom formatter for %s.\n", typeA); - return false; } } }; @@ -942,7 +938,7 @@ class CommandObjectTypeFormatterClear : public CommandObjectParsed { protected: virtual void FormatterSpecificDeletion() {} - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (m_options.m_delete_all) { DataVisualization::Categories::ForEach( [this](const TypeCategoryImplSP &category_sp) -> bool { @@ -965,7 +961,6 @@ class CommandObjectTypeFormatterClear : public CommandObjectParsed { FormatterSpecificDeletion(); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -1077,7 +1072,7 @@ class CommandObjectTypeFormatterList : public CommandObjectParsed { return regex == nullptr || s == regex->GetText() || regex->Execute(s); } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); std::unique_ptr category_regex; @@ -1090,7 +1085,7 @@ class CommandObjectTypeFormatterList : public CommandObjectParsed { result.AppendErrorWithFormat( "syntax error in category regular expression '%s'", m_options.m_category_regex.GetCurrentValueAsRef().str().c_str()); - return false; + return; } } @@ -1100,7 +1095,7 @@ class CommandObjectTypeFormatterList : public CommandObjectParsed { if (!formatter_regex->IsValid()) { result.AppendErrorWithFormat("syntax error in regular expression '%s'", arg); - return false; + return; } } @@ -1154,7 +1149,6 @@ class CommandObjectTypeFormatterList : public CommandObjectParsed { result.GetOutputStream().PutCString("no matching results found.\n"); result.SetStatus(eReturnStatusSuccessFinishNoResult); } - return result.Succeeded(); } }; @@ -1557,20 +1551,20 @@ Alternatively, the -o option can be used when providing a simple one-line Python (lldb) type summary add JustADemo -o "value = valobj.GetChildMemberWithName('value'); return 'My value is ' + value.GetValue();")"); } -bool CommandObjectTypeSummaryAdd::DoExecute(Args &command, +void CommandObjectTypeSummaryAdd::DoExecute(Args &command, CommandReturnObject &result) { WarnOnPotentialUnquotedUnsignedType(command, result); if (m_options.m_is_add_script) { #if LLDB_ENABLE_PYTHON - return Execute_ScriptSummary(command, result); + Execute_ScriptSummary(command, result); #else result.AppendError("python is disabled"); - return false; #endif + return; } - return Execute_StringSummary(command, result); + Execute_StringSummary(command, result); } static bool FixArrayTypeNameWithRegex(ConstString &type_name) { @@ -1773,13 +1767,13 @@ class CommandObjectTypeCategoryDefine : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc < 1) { result.AppendErrorWithFormat("%s takes 1 or more args.\n", m_cmd_name.c_str()); - return false; + return; } for (auto &entry : command.entries()) { @@ -1795,7 +1789,6 @@ class CommandObjectTypeCategoryDefine : public CommandObjectParsed { } result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -1875,13 +1868,13 @@ class CommandObjectTypeCategoryEnable : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc < 1 && m_options.m_language == lldb::eLanguageTypeUnknown) { result.AppendErrorWithFormat("%s takes arguments and/or a language", m_cmd_name.c_str()); - return false; + return; } if (argc == 1 && strcmp(command.GetArgumentAtIndex(0), "*") == 0) { @@ -1893,7 +1886,7 @@ class CommandObjectTypeCategoryEnable : public CommandObjectParsed { if (!typeCS) { result.AppendError("empty category name not allowed"); - return false; + return; } DataVisualization::Categories::Enable(typeCS); lldb::TypeCategoryImplSP cate; @@ -1909,7 +1902,6 @@ class CommandObjectTypeCategoryEnable : public CommandObjectParsed { DataVisualization::Categories::Enable(m_options.m_language); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -1943,13 +1935,13 @@ class CommandObjectTypeCategoryDelete : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc < 1) { result.AppendErrorWithFormat("%s takes 1 or more arg.\n", m_cmd_name.c_str()); - return false; + return; } bool success = true; @@ -1961,17 +1953,15 @@ class CommandObjectTypeCategoryDelete : public CommandObjectParsed { if (!typeCS) { result.AppendError("empty category name not allowed"); - return false; + return; } if (!DataVisualization::Categories::Delete(typeCS)) success = false; // keep deleting even if we hit an error } if (success) { result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } else { result.AppendError("cannot delete one or more categories\n"); - return false; } } }; @@ -2052,13 +2042,13 @@ class CommandObjectTypeCategoryDisable : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc < 1 && m_options.m_language == lldb::eLanguageTypeUnknown) { result.AppendErrorWithFormat("%s takes arguments and/or a language", m_cmd_name.c_str()); - return false; + return; } if (argc == 1 && strcmp(command.GetArgumentAtIndex(0), "*") == 0) { @@ -2071,7 +2061,7 @@ class CommandObjectTypeCategoryDisable : public CommandObjectParsed { if (!typeCS) { result.AppendError("empty category name not allowed"); - return false; + return; } DataVisualization::Categories::Disable(typeCS); } @@ -2081,7 +2071,6 @@ class CommandObjectTypeCategoryDisable : public CommandObjectParsed { DataVisualization::Categories::Disable(m_options.m_language); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -2117,7 +2106,7 @@ class CommandObjectTypeCategoryList : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); std::unique_ptr regex; @@ -2128,12 +2117,12 @@ class CommandObjectTypeCategoryList : public CommandObjectParsed { if (!regex->IsValid()) { result.AppendErrorWithFormat( "syntax error in category regular expression '%s'", arg); - return false; + return; } } else if (argc != 0) { result.AppendErrorWithFormat("%s takes 0 or one arg.\n", m_cmd_name.c_str()); - return false; + return; } DataVisualization::Categories::ForEach( @@ -2157,7 +2146,6 @@ class CommandObjectTypeCategoryList : public CommandObjectParsed { }); result.SetStatus(eReturnStatusSuccessFinishResult); - return result.Succeeded(); } }; @@ -2570,19 +2558,19 @@ all children of my_foo as if no filter was defined:" ~CommandObjectTypeFilterAdd() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc < 1) { result.AppendErrorWithFormat("%s takes one or more args.\n", m_cmd_name.c_str()); - return false; + return; } if (m_options.m_expr_paths.empty()) { result.AppendErrorWithFormat("%s needs one or more children.\n", m_cmd_name.c_str()); - return false; + return; } TypeFilterImplSP entry(new TypeFilterImpl( @@ -2611,7 +2599,7 @@ all children of my_foo as if no filter was defined:" for (auto &arg_entry : command.entries()) { if (arg_entry.ref().empty()) { result.AppendError("empty typenames not allowed"); - return false; + return; } ConstString typeCS(arg_entry.ref()); @@ -2619,12 +2607,11 @@ all children of my_foo as if no filter was defined:" m_options.m_regex ? eRegexFilter : eRegularFilter, m_options.m_category, &error)) { result.AppendError(error.AsCString()); - return false; + return; } } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); } }; @@ -2730,12 +2717,12 @@ class CommandObjectTypeLookup : public CommandObjectRaw { return m_cmd_help_long; } - bool DoExecute(llvm::StringRef raw_command_line, + void DoExecute(llvm::StringRef raw_command_line, CommandReturnObject &result) override { if (raw_command_line.empty()) { result.AppendError( "type lookup cannot be invoked without a type name as argument"); - return false; + return; } auto exe_ctx = GetCommandInterpreter().GetExecutionContext(); @@ -2747,7 +2734,7 @@ class CommandObjectTypeLookup : public CommandObjectRaw { if (args.HasArgs()) if (!ParseOptionsAndNotify(args.GetArgs(), result, m_option_group, exe_ctx)) - return false; + return; ExecutionContextScope *best_scope = exe_ctx.GetBestExecutionContextScope(); @@ -2827,7 +2814,6 @@ class CommandObjectTypeLookup : public CommandObjectRaw { result.SetStatus(any_found ? lldb::eReturnStatusSuccessFinishResult : lldb::eReturnStatusSuccessFinishNoResult); - return true; } }; @@ -2858,13 +2844,13 @@ class CommandObjectFormatterInfo : public CommandObjectRaw { ~CommandObjectFormatterInfo() override = default; protected: - bool DoExecute(llvm::StringRef command, + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override { TargetSP target_sp = GetDebugger().GetSelectedTarget(); Thread *thread = GetDefaultThread(); if (!thread) { result.AppendError("no default thread"); - return false; + return; } StackFrameSP frame_sp = @@ -2894,10 +2880,8 @@ class CommandObjectFormatterInfo : public CommandObjectRaw { << ") " << command << "\n"; result.SetStatus(lldb::eReturnStatusSuccessFinishNoResult); } - return true; } else { result.AppendError("failed to evaluate expression"); - return false; } } diff --git a/lldb/source/Commands/CommandObjectVersion.cpp b/lldb/source/Commands/CommandObjectVersion.cpp index 9b3c9e67a1a743..f13ec18e240c04 100644 --- a/lldb/source/Commands/CommandObjectVersion.cpp +++ b/lldb/source/Commands/CommandObjectVersion.cpp @@ -22,8 +22,7 @@ CommandObjectVersion::CommandObjectVersion(CommandInterpreter &interpreter) CommandObjectVersion::~CommandObjectVersion() = default; -bool CommandObjectVersion::DoExecute(Args &args, CommandReturnObject &result) { +void CommandObjectVersion::DoExecute(Args &args, CommandReturnObject &result) { result.AppendMessageWithFormat("%s\n", lldb_private::GetVersion()); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } diff --git a/lldb/source/Commands/CommandObjectVersion.h b/lldb/source/Commands/CommandObjectVersion.h index dce1a8d67b88c8..4ba081bf8706d7 100644 --- a/lldb/source/Commands/CommandObjectVersion.h +++ b/lldb/source/Commands/CommandObjectVersion.h @@ -22,7 +22,7 @@ class CommandObjectVersion : public CommandObjectParsed { ~CommandObjectVersion() override; protected: - bool DoExecute(Args &args, CommandReturnObject &result) override; + void DoExecute(Args &args, CommandReturnObject &result) override; }; } // namespace lldb_private diff --git a/lldb/source/Commands/CommandObjectWatchpoint.cpp b/lldb/source/Commands/CommandObjectWatchpoint.cpp index dc5be0da43f5e6..cd1d226988f243 100644 --- a/lldb/source/Commands/CommandObjectWatchpoint.cpp +++ b/lldb/source/Commands/CommandObjectWatchpoint.cpp @@ -207,7 +207,7 @@ class CommandObjectWatchpointList : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); if (target->GetProcessSP() && target->GetProcessSP()->IsAlive()) { @@ -230,7 +230,7 @@ class CommandObjectWatchpointList : public CommandObjectParsed { if (num_watchpoints == 0) { result.AppendMessage("No watchpoints currently set."); result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; + return; } Stream &output_stream = result.GetOutputStream(); @@ -249,7 +249,7 @@ class CommandObjectWatchpointList : public CommandObjectParsed { if (!CommandObjectMultiwordWatchpoint::VerifyWatchpointIDs( target, command, wp_ids)) { result.AppendError("Invalid watchpoints specification."); - return false; + return; } const size_t size = wp_ids.size(); @@ -260,8 +260,6 @@ class CommandObjectWatchpointList : public CommandObjectParsed { result.SetStatus(eReturnStatusSuccessFinishNoResult); } } - - return result.Succeeded(); } private: @@ -297,10 +295,10 @@ class CommandObjectWatchpointEnable : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); if (!CheckTargetForWatchpointOperations(target, result)) - return false; + return; std::unique_lock lock; target->GetWatchpointList().GetListMutex(lock); @@ -311,7 +309,7 @@ class CommandObjectWatchpointEnable : public CommandObjectParsed { if (num_watchpoints == 0) { result.AppendError("No watchpoints exist to be enabled."); - return false; + return; } if (command.GetArgumentCount() == 0) { @@ -327,7 +325,7 @@ class CommandObjectWatchpointEnable : public CommandObjectParsed { if (!CommandObjectMultiwordWatchpoint::VerifyWatchpointIDs( target, command, wp_ids)) { result.AppendError("Invalid watchpoints specification."); - return false; + return; } int count = 0; @@ -338,8 +336,6 @@ class CommandObjectWatchpointEnable : public CommandObjectParsed { result.AppendMessageWithFormat("%d watchpoints enabled.\n", count); result.SetStatus(eReturnStatusSuccessFinishNoResult); } - - return result.Succeeded(); } }; @@ -373,10 +369,10 @@ class CommandObjectWatchpointDisable : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); if (!CheckTargetForWatchpointOperations(target, result)) - return false; + return; std::unique_lock lock; target->GetWatchpointList().GetListMutex(lock); @@ -386,7 +382,7 @@ class CommandObjectWatchpointDisable : public CommandObjectParsed { if (num_watchpoints == 0) { result.AppendError("No watchpoints exist to be disabled."); - return false; + return; } if (command.GetArgumentCount() == 0) { @@ -405,7 +401,7 @@ class CommandObjectWatchpointDisable : public CommandObjectParsed { if (!CommandObjectMultiwordWatchpoint::VerifyWatchpointIDs( target, command, wp_ids)) { result.AppendError("Invalid watchpoints specification."); - return false; + return; } int count = 0; @@ -416,8 +412,6 @@ class CommandObjectWatchpointDisable : public CommandObjectParsed { result.AppendMessageWithFormat("%d watchpoints disabled.\n", count); result.SetStatus(eReturnStatusSuccessFinishNoResult); } - - return result.Succeeded(); } }; @@ -489,10 +483,10 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); if (!CheckTargetForWatchpointOperations(target, result)) - return false; + return; std::unique_lock lock; target->GetWatchpointList().GetListMutex(lock); @@ -503,7 +497,7 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed { if (num_watchpoints == 0) { result.AppendError("No watchpoints exist to be deleted."); - return false; + return; } if (command.empty()) { @@ -519,7 +513,7 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed { (uint64_t)num_watchpoints); } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return result.Succeeded(); + return; } // Particular watchpoints selected; delete them. @@ -527,7 +521,7 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed { if (!CommandObjectMultiwordWatchpoint::VerifyWatchpointIDs(target, command, wp_ids)) { result.AppendError("Invalid watchpoints specification."); - return false; + return; } int count = 0; @@ -537,8 +531,6 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed { ++count; result.AppendMessageWithFormat("%d watchpoints deleted.\n", count); result.SetStatus(eReturnStatusSuccessFinishNoResult); - - return result.Succeeded(); } private: @@ -616,10 +608,10 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); if (!CheckTargetForWatchpointOperations(target, result)) - return false; + return; std::unique_lock lock; target->GetWatchpointList().GetListMutex(lock); @@ -630,7 +622,7 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed { if (num_watchpoints == 0) { result.AppendError("No watchpoints exist to be ignored."); - return false; + return; } if (command.GetArgumentCount() == 0) { @@ -645,7 +637,7 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed { if (!CommandObjectMultiwordWatchpoint::VerifyWatchpointIDs( target, command, wp_ids)) { result.AppendError("Invalid watchpoints specification."); - return false; + return; } int count = 0; @@ -656,8 +648,6 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed { result.AppendMessageWithFormat("%d watchpoints ignored.\n", count); result.SetStatus(eReturnStatusSuccessFinishNoResult); } - - return result.Succeeded(); } private: @@ -742,10 +732,10 @@ class CommandObjectWatchpointModify : public CommandObjectParsed { }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); if (!CheckTargetForWatchpointOperations(target, result)) - return false; + return; std::unique_lock lock; target->GetWatchpointList().GetListMutex(lock); @@ -756,7 +746,7 @@ class CommandObjectWatchpointModify : public CommandObjectParsed { if (num_watchpoints == 0) { result.AppendError("No watchpoints exist to be modified."); - return false; + return; } if (command.GetArgumentCount() == 0) { @@ -769,7 +759,7 @@ class CommandObjectWatchpointModify : public CommandObjectParsed { if (!CommandObjectMultiwordWatchpoint::VerifyWatchpointIDs( target, command, wp_ids)) { result.AppendError("Invalid watchpoints specification."); - return false; + return; } int count = 0; @@ -784,8 +774,6 @@ class CommandObjectWatchpointModify : public CommandObjectParsed { result.AppendMessageWithFormat("%d watchpoints modified.\n", count); result.SetStatus(eReturnStatusSuccessFinishNoResult); } - - return result.Succeeded(); } private: @@ -866,7 +854,7 @@ corresponding to the byte size of the data type."); return variable_list.GetSize() - old_size; } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = GetDebugger().GetSelectedTarget().get(); StackFrame *frame = m_exe_ctx.GetFramePtr(); @@ -875,7 +863,7 @@ corresponding to the byte size of the data type."); if (command.GetArgumentCount() <= 0) { result.AppendError("required argument missing; " "specify your program variable to watch for"); - return false; + return; } // If no '-w' is specified, default to '-w modify'. @@ -895,7 +883,7 @@ corresponding to the byte size of the data type."); // A simple watch variable gesture allows only one argument. if (command.GetArgumentCount() != 1) { result.AppendError("specify exactly one variable to watch for"); - return false; + return; } // Things have checked out ok... @@ -943,7 +931,7 @@ corresponding to the byte size of the data type."); result.AppendErrorWithFormat("unable to find any variable " "expression path that matches '%s'", command.GetArgumentAtIndex(0)); - return false; + return; } // Now it's time to create the watchpoint. @@ -975,7 +963,7 @@ corresponding to the byte size of the data type."); addr, static_cast(size), command.GetArgumentAtIndex(0)); if (const char *error_message = error.AsCString(nullptr)) result.AppendError(error_message); - return result.Succeeded(); + return; } watch_sp->SetWatchSpec(command.GetArgumentAtIndex(0)); @@ -994,8 +982,6 @@ corresponding to the byte size of the data type."); watch_sp->GetDescription(&output_stream, lldb::eDescriptionLevelFull); output_stream.EOL(); result.SetStatus(eReturnStatusSuccessFinishResult); - - return result.Succeeded(); } private: @@ -1061,7 +1047,7 @@ class CommandObjectWatchpointSetExpression : public CommandObjectRaw { Options *GetOptions() override { return &m_option_group; } protected: - bool DoExecute(llvm::StringRef raw_command, + void DoExecute(llvm::StringRef raw_command, CommandReturnObject &result) override { auto exe_ctx = GetCommandInterpreter().GetExecutionContext(); m_option_group.NotifyOptionParsingStarting( @@ -1077,14 +1063,14 @@ class CommandObjectWatchpointSetExpression : public CommandObjectRaw { if (args.HasArgs()) if (!ParseOptionsAndNotify(args.GetArgs(), result, m_option_group, exe_ctx)) - return false; + return; // If no argument is present, issue an error message. There's no way to // set a watchpoint. if (raw_command.trim().empty()) { result.AppendError("required argument missing; specify an expression " "to evaluate into the address to watch for"); - return false; + return; } // If no '-w' is specified, default to '-w write'. @@ -1116,7 +1102,7 @@ class CommandObjectWatchpointSetExpression : public CommandObjectRaw { result.AppendErrorWithFormat("expression evaluated: \n%s", expr.data()); if (valobj_sp && !valobj_sp->GetError().Success()) result.AppendError(valobj_sp->GetError().AsCString()); - return false; + return; } // Get the address to watch. @@ -1124,7 +1110,7 @@ class CommandObjectWatchpointSetExpression : public CommandObjectRaw { addr = valobj_sp->GetValueAsUnsigned(0, &success); if (!success) { result.AppendError("expression did not evaluate to an address"); - return false; + return; } if (m_option_watchpoint.watch_size != 0) @@ -1173,8 +1159,6 @@ class CommandObjectWatchpointSetExpression : public CommandObjectRaw { if (error.AsCString(nullptr)) result.AppendError(error.AsCString()); } - - return result.Succeeded(); } private: diff --git a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp index 37052ddd62c886..b1629ceab27098 100644 --- a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp @@ -366,7 +366,7 @@ are no syntax errors may indicate that a function was declared but never called. }; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); const WatchpointList &watchpoints = target->GetWatchpointList(); @@ -374,7 +374,7 @@ are no syntax errors may indicate that a function was declared but never called. if (num_watchpoints == 0) { result.AppendError("No watchpoints exist to have commands added"); - return false; + return; } if (!m_options.m_function_name.empty()) { @@ -388,7 +388,7 @@ are no syntax errors may indicate that a function was declared but never called. if (!CommandObjectMultiwordWatchpoint::VerifyWatchpointIDs(target, command, valid_wp_ids)) { result.AppendError("Invalid watchpoints specification."); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishNoResult); @@ -441,8 +441,6 @@ are no syntax errors may indicate that a function was declared but never called. } } } - - return result.Succeeded(); } private: @@ -475,7 +473,7 @@ class CommandObjectWatchpointCommandDelete : public CommandObjectParsed { ~CommandObjectWatchpointCommandDelete() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); const WatchpointList &watchpoints = target->GetWatchpointList(); @@ -483,20 +481,20 @@ class CommandObjectWatchpointCommandDelete : public CommandObjectParsed { if (num_watchpoints == 0) { result.AppendError("No watchpoints exist to have commands deleted"); - return false; + return; } if (command.GetArgumentCount() == 0) { result.AppendError( "No watchpoint specified from which to delete the commands"); - return false; + return; } std::vector valid_wp_ids; if (!CommandObjectMultiwordWatchpoint::VerifyWatchpointIDs(target, command, valid_wp_ids)) { result.AppendError("Invalid watchpoints specification."); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishNoResult); @@ -509,10 +507,9 @@ class CommandObjectWatchpointCommandDelete : public CommandObjectParsed { wp->ClearCallback(); } else { result.AppendErrorWithFormat("Invalid watchpoint ID: %u.\n", cur_wp_id); - return false; + return; } } - return result.Succeeded(); } }; @@ -543,7 +540,7 @@ class CommandObjectWatchpointCommandList : public CommandObjectParsed { ~CommandObjectWatchpointCommandList() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { Target *target = &GetSelectedTarget(); const WatchpointList &watchpoints = target->GetWatchpointList(); @@ -551,20 +548,20 @@ class CommandObjectWatchpointCommandList : public CommandObjectParsed { if (num_watchpoints == 0) { result.AppendError("No watchpoints exist for which to list commands"); - return false; + return; } if (command.GetArgumentCount() == 0) { result.AppendError( "No watchpoint specified for which to list the commands"); - return false; + return; } std::vector valid_wp_ids; if (!CommandObjectMultiwordWatchpoint::VerifyWatchpointIDs(target, command, valid_wp_ids)) { result.AppendError("Invalid watchpoints specification."); - return false; + return; } result.SetStatus(eReturnStatusSuccessFinishNoResult); @@ -598,8 +595,6 @@ class CommandObjectWatchpointCommandList : public CommandObjectParsed { } } } - - return result.Succeeded(); } }; diff --git a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp index 711a696ff9b4d6..53e856bf3514e0 100644 --- a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp +++ b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp @@ -339,7 +339,7 @@ class CommandObjectMultiwordItaniumABI_Demangle : public CommandObjectParsed { ~CommandObjectMultiwordItaniumABI_Demangle() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { bool demangled_any = false; bool error_any = false; for (auto &entry : command.entries()) { @@ -372,7 +372,6 @@ class CommandObjectMultiwordItaniumABI_Demangle : public CommandObjectParsed { error_any ? lldb::eReturnStatusFailed : (demangled_any ? lldb::eReturnStatusSuccessFinishResult : lldb::eReturnStatusSuccessFinishNoResult)); - return result.Succeeded(); } }; diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp index a50cdc88cd0124..1fd7d027731de0 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp @@ -917,7 +917,7 @@ class CommandObjectObjC_ClassTable_Dump : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { std::unique_ptr regex_up; switch (command.GetArgumentCount()) { case 0: @@ -929,14 +929,14 @@ class CommandObjectObjC_ClassTable_Dump : public CommandObjectParsed { result.AppendError( "invalid argument - please provide a valid regular expression"); result.SetStatus(lldb::eReturnStatusFailed); - return false; + return; } break; } default: { result.AppendError("please provide 0 or 1 arguments"); result.SetStatus(lldb::eReturnStatusFailed); - return false; + return; } } @@ -997,11 +997,10 @@ class CommandObjectObjC_ClassTable_Dump : public CommandObjectParsed { } } result.SetStatus(lldb::eReturnStatusSuccessFinishResult); - return true; + return; } result.AppendError("current process has no Objective-C runtime loaded"); result.SetStatus(lldb::eReturnStatusFailed); - return false; } CommandOptions m_options; @@ -1034,11 +1033,11 @@ class CommandObjectMultiwordObjC_TaggedPointer_Info ~CommandObjectMultiwordObjC_TaggedPointer_Info() override = default; protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (command.GetArgumentCount() == 0) { result.AppendError("this command requires arguments"); result.SetStatus(lldb::eReturnStatusFailed); - return false; + return; } Process *process = m_exe_ctx.GetProcessPtr(); @@ -1048,7 +1047,7 @@ class CommandObjectMultiwordObjC_TaggedPointer_Info if (!objc_runtime) { result.AppendError("current process has no Objective-C runtime loaded"); result.SetStatus(lldb::eReturnStatusFailed); - return false; + return; } ObjCLanguageRuntime::TaggedPointerVendor *tagged_ptr_vendor = @@ -1056,7 +1055,7 @@ class CommandObjectMultiwordObjC_TaggedPointer_Info if (!tagged_ptr_vendor) { result.AppendError("current process has no tagged pointer support"); result.SetStatus(lldb::eReturnStatusFailed); - return false; + return; } for (size_t i = 0; i < command.GetArgumentCount(); i++) { @@ -1071,7 +1070,7 @@ class CommandObjectMultiwordObjC_TaggedPointer_Info result.AppendErrorWithFormatv( "could not convert '{0}' to a valid address\n", arg_str); result.SetStatus(lldb::eReturnStatusFailed); - return false; + return; } if (!tagged_ptr_vendor->IsPossibleTaggedPointer(arg_addr)) { @@ -1084,7 +1083,7 @@ class CommandObjectMultiwordObjC_TaggedPointer_Info result.AppendErrorWithFormatv( "could not get class descriptor for {0:x16}\n", arg_addr); result.SetStatus(lldb::eReturnStatusFailed); - return false; + return; } uint64_t info_bits = 0; @@ -1106,7 +1105,6 @@ class CommandObjectMultiwordObjC_TaggedPointer_Info } result.SetStatus(lldb::eReturnStatusSuccessFinishResult); - return true; } }; diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp index 79f8b15a7f229c..0d1caf4d7318b7 100644 --- a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp +++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp @@ -881,7 +881,7 @@ class CommandObjectProcessKDPPacketSend : public CommandObjectParsed { ~CommandObjectProcessKDPPacketSend() override = default; - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { if (!m_command_byte.GetOptionValue().OptionWasSet()) { result.AppendError( "the --command option must be set to a valid command byte"); @@ -907,7 +907,7 @@ class CommandObjectProcessKDPPacketSend : public CommandObjectParsed { "even number of ASCII hex " "characters: '%s'", ascii_hex_bytes_cstr); - return false; + return; } payload_bytes.resize(ascii_hex_bytes_cstr_len / 2); if (extractor.GetHexBytes(payload_bytes, '\xdd') != @@ -916,7 +916,7 @@ class CommandObjectProcessKDPPacketSend : public CommandObjectParsed { "ASCII hex characters (no " "spaces or hex prefixes): '%s'", ascii_hex_bytes_cstr); - return false; + return; } } Status error; @@ -934,7 +934,7 @@ class CommandObjectProcessKDPPacketSend : public CommandObjectParsed { endian::InlHostByteOrder(), endian::InlHostByteOrder()); result.AppendMessage(packet.GetString()); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } else { const char *error_cstr = error.AsCString(); if (error_cstr && error_cstr[0]) @@ -942,7 +942,7 @@ class CommandObjectProcessKDPPacketSend : public CommandObjectParsed { else result.AppendErrorWithFormat("unknown error 0x%8.8x", error.GetError()); - return false; + return; } } else { result.AppendErrorWithFormat("process must be stopped in order " @@ -958,7 +958,6 @@ class CommandObjectProcessKDPPacketSend : public CommandObjectParsed { command_byte); } } - return false; } }; diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 56fc5490657ea7..dad1396698050d 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -5179,7 +5179,7 @@ class CommandObjectProcessGDBRemoteSpeedTest : public CommandObjectParsed { Options *GetOptions() override { return &m_option_group; } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc == 0) { ProcessGDBRemote *process = @@ -5201,14 +5201,13 @@ class CommandObjectProcessGDBRemoteSpeedTest : public CommandObjectParsed { num_packets, max_send, max_recv, k_recv_amount, json, output_stream_sp ? *output_stream_sp : result.GetOutputStream()); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } } else { result.AppendErrorWithFormat("'%s' takes no arguments", m_cmd_name.c_str()); } result.SetStatus(eReturnStatusFailed); - return false; } protected: @@ -5228,16 +5227,15 @@ class CommandObjectProcessGDBRemotePacketHistory : public CommandObjectParsed { ~CommandObjectProcessGDBRemotePacketHistory() override = default; - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { ProcessGDBRemote *process = (ProcessGDBRemote *)m_interpreter.GetExecutionContext().GetProcessPtr(); if (process) { process->DumpPluginHistory(result.GetOutputStream()); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } result.SetStatus(eReturnStatusFailed); - return false; } }; @@ -5255,14 +5253,14 @@ class CommandObjectProcessGDBRemotePacketXferSize : public CommandObjectParsed { ~CommandObjectProcessGDBRemotePacketXferSize() override = default; - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc == 0) { result.AppendErrorWithFormat("'%s' takes an argument to specify the max " "amount to be transferred when " "reading/writing", m_cmd_name.c_str()); - return false; + return; } ProcessGDBRemote *process = @@ -5274,11 +5272,10 @@ class CommandObjectProcessGDBRemotePacketXferSize : public CommandObjectParsed { if (errno == 0 && user_specified_max != 0) { process->SetUserSpecifiedMaxMemoryTransferSize(user_specified_max); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } } result.SetStatus(eReturnStatusFailed); - return false; } }; @@ -5299,13 +5296,13 @@ class CommandObjectProcessGDBRemotePacketSend : public CommandObjectParsed { ~CommandObjectProcessGDBRemotePacketSend() override = default; - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc == 0) { result.AppendErrorWithFormat( "'%s' takes a one or more packet content arguments", m_cmd_name.c_str()); - return false; + return; } ProcessGDBRemote *process = @@ -5331,7 +5328,6 @@ class CommandObjectProcessGDBRemotePacketSend : public CommandObjectParsed { output_strm.Printf("response: %s\n", response.GetStringRef().data()); } } - return true; } }; @@ -5348,12 +5344,12 @@ class CommandObjectProcessGDBRemotePacketMonitor : public CommandObjectRaw { ~CommandObjectProcessGDBRemotePacketMonitor() override = default; - bool DoExecute(llvm::StringRef command, + void DoExecute(llvm::StringRef command, CommandReturnObject &result) override { if (command.empty()) { result.AppendErrorWithFormat("'%s' takes a command string argument", m_cmd_name.c_str()); - return false; + return; } ProcessGDBRemote *process = @@ -5377,7 +5373,6 @@ class CommandObjectProcessGDBRemotePacketMonitor : public CommandObjectRaw { else output_strm.Printf("response: %s\n", response.GetStringRef().data()); } - return true; } }; diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp index 99d0b54c40f952..0d5ca42691d3d4 100644 --- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp +++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp @@ -795,12 +795,12 @@ class CommandObjectProcessMinidumpDump : public CommandObjectParsed { Options *GetOptions() override { return &m_option_group; } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); if (argc > 0) { result.AppendErrorWithFormat("'%s' take no arguments, only options", m_cmd_name.c_str()); - return false; + return; } SetDefaultOptionsIfNoneAreSet(); @@ -904,9 +904,7 @@ class CommandObjectProcessMinidumpDump : public CommandObjectParsed { DumpTextStream(StreamType::FacebookThreadName, "Facebook Thread Name"); if (DumpFacebookLogcat()) - DumpTextStream(StreamType::FacebookLogcat, - "Facebook Logcat"); - return true; + DumpTextStream(StreamType::FacebookLogcat, "Facebook Logcat"); } }; diff --git a/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp b/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp index f8a8df84ca37f2..c46dc54c912e51 100644 --- a/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp +++ b/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp @@ -766,7 +766,7 @@ class EnableCommand : public CommandObjectParsed { result.AppendWarning(stream.GetString()); } - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { // First off, set the global sticky state of enable/disable based on this // command execution. s_is_explicitly_enabled = m_enable; @@ -790,14 +790,14 @@ class EnableCommand : public CommandObjectParsed { if (!process_sp) { // No active process, so there is nothing more to do right now. result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; + return; } // If the process is no longer alive, we can't do this now. We'll catch it // the next time the process is started up. if (!process_sp->IsAlive()) { result.SetStatus(eReturnStatusSuccessFinishNoResult); - return true; + return; } // Get the plugin for the process. @@ -838,7 +838,6 @@ class EnableCommand : public CommandObjectParsed { // one this command is setup to do. plugin.SetEnabled(m_enable); } - return result.Succeeded(); } Options *GetOptions() override { @@ -861,7 +860,7 @@ class StatusCommand : public CommandObjectParsed { "plugin structured-data darwin-log status") {} protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { auto &stream = result.GetOutputStream(); // Figure out if we've got a process. If so, we can tell if DarwinLog is @@ -891,7 +890,7 @@ class StatusCommand : public CommandObjectParsed { if (!options_sp) { // Nothing more to do. result.SetStatus(eReturnStatusSuccessFinishResult); - return true; + return; } // Print filter rules @@ -924,7 +923,6 @@ class StatusCommand : public CommandObjectParsed { options_sp->GetFallthroughAccepts() ? "accept" : "reject"); result.SetStatus(eReturnStatusSuccessFinishResult); - return true; } }; diff --git a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h index 254baaf3e67367..82714dea3fcdba 100644 --- a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h @@ -105,7 +105,7 @@ class CommandObjectProcessTraceStartIntelPT : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override; + void DoExecute(Args &command, CommandReturnObject &result) override; TraceIntelPT &m_trace; CommandOptions m_options; diff --git a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp index 33d05ee2ac1378..ee8970fb4de278 100644 --- a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp +++ b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp @@ -62,7 +62,7 @@ CommandObjectThreadTraceExportCTF::CommandOptions::GetDefinitions() { return llvm::ArrayRef(g_thread_trace_export_ctf_options); } -bool CommandObjectThreadTraceExportCTF::DoExecute(Args &command, +void CommandObjectThreadTraceExportCTF::DoExecute(Args &command, CommandReturnObject &result) { const TraceSP &trace_sp = m_exe_ctx.GetTargetSP()->GetTrace(); Process *process = m_exe_ctx.GetProcessPtr(); @@ -78,7 +78,6 @@ bool CommandObjectThreadTraceExportCTF::DoExecute(Args &command, result.AppendErrorWithFormatv( "Thread index {0} is out of range (valid values are 1 - {1}).\n", tid, num_threads); - return false; } else { auto do_work = [&]() -> Error { Expected cursor = trace_sp->CreateNewCursor(*thread); @@ -91,9 +90,6 @@ bool CommandObjectThreadTraceExportCTF::DoExecute(Args &command, if (llvm::Error err = do_work()) { result.AppendErrorWithFormat("%s\n", toString(std::move(err)).c_str()); - return false; - } else { - return true; } } } diff --git a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.h b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.h index c9f02a372dedae..1a034e87cfb65b 100644 --- a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.h +++ b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.h @@ -48,7 +48,7 @@ class CommandObjectThreadTraceExportCTF : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override; + void DoExecute(Args &command, CommandReturnObject &result) override; CommandOptions m_options; }; diff --git a/lldb/unittests/Interpreter/TestCommandPaths.cpp b/lldb/unittests/Interpreter/TestCommandPaths.cpp index 78948ae5b70658..0f0a2791ebb808 100644 --- a/lldb/unittests/Interpreter/TestCommandPaths.cpp +++ b/lldb/unittests/Interpreter/TestCommandPaths.cpp @@ -48,10 +48,9 @@ class CommandObjectLeaf : public CommandObjectParsed { } protected: - bool DoExecute(Args &command, CommandReturnObject &result) override { + void DoExecute(Args &command, CommandReturnObject &result) override { result.SetStatus(eReturnStatusSuccessFinishResult); result.AppendMessage("I did nothing"); - return true; } }; From 1abc931d469bca47193efa4d0676776c31438f3e Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 30 Oct 2023 13:35:25 -0700 Subject: [PATCH 078/144] [DirectX] Remove redundant resource kind handling. NFC (#70691) The frontend passes this along since https://reviews.llvm.org/D135335 --- llvm/lib/Target/DirectX/DXILResource.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILResource.cpp b/llvm/lib/Target/DirectX/DXILResource.cpp index dde7255e04254e..0390a3f0a558d8 100644 --- a/llvm/lib/Target/DirectX/DXILResource.cpp +++ b/llvm/lib/Target/DirectX/DXILResource.cpp @@ -260,20 +260,6 @@ void UAVResource::print(raw_ostream &OS) const { // https://github.com/llvm/llvm-project/issues/57991). void UAVResource::parseSourceType(StringRef S) { IsROV = S.startswith("RasterizerOrdered"); - if (IsROV) - S = S.substr(strlen("RasterizerOrdered")); - if (S.startswith("RW")) - S = S.substr(strlen("RW")); - - // Note: I'm deliberately not handling any of the Texture buffer types at the - // moment. I want to resolve the issue above before adding Texture or Sampler - // support. - Shape = StringSwitch(S) - .StartsWith("Buffer<", Kinds::TypedBuffer) - .StartsWith("ByteAddressBuffer<", Kinds::RawBuffer) - .StartsWith("StructuredBuffer<", Kinds::StructuredBuffer) - .Default(Kinds::Invalid); - assert(Shape != Kinds::Invalid && "Unsupported buffer type"); S = S.substr(S.find("<") + 1); From c92c86f66a72c2a07dce4976ab26446e5f272875 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 30 Oct 2023 13:21:56 -0700 Subject: [PATCH 079/144] [RISCV] Add test coverage for "zext nneg" [nfc] This IR feature was recently added in #67982. An upcoming change will improve our lowering on these examples. --- llvm/test/CodeGen/RISCV/sext-zext-trunc.ll | 158 +++++++++++++++++++++ 1 file changed, 158 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll index 98488c9a589a3a..7297bfaf0c62ec 100644 --- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll @@ -350,6 +350,164 @@ define i64 @zext_i32_to_i64(i32 %a) nounwind { ret i64 %1 } +define i8 @zext_nneg_i1_to_i8(i1 %a) nounwind { +; RV32I-LABEL: zext_nneg_i1_to_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: ret +; +; RV64-LABEL: zext_nneg_i1_to_i8: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: ret + %1 = zext nneg i1 %a to i8 + ret i8 %1 +} + +define i16 @zext_nneg_i1_to_i16(i1 %a) nounwind { +; RV32I-LABEL: zext_nneg_i1_to_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: ret +; +; RV64-LABEL: zext_nneg_i1_to_i16: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: ret + %1 = zext nneg i1 %a to i16 + ret i16 %1 +} + +define i32 @zext_nneg_i1_to_i32(i1 %a) nounwind { +; RV32I-LABEL: zext_nneg_i1_to_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: ret +; +; RV64-LABEL: zext_nneg_i1_to_i32: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: ret + %1 = zext nneg i1 %a to i32 + ret i32 %1 +} + +define i64 @zext_nneg_i1_to_i64(i1 %a) nounwind { +; RV32I-LABEL: zext_nneg_i1_to_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; +; RV64-LABEL: zext_nneg_i1_to_i64: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: ret + %1 = zext nneg i1 %a to i64 + ret i64 %1 +} + +define i16 @zext_nneg_i8_to_i16(i8 %a) nounwind { +; RV32I-LABEL: zext_nneg_i8_to_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: ret +; +; RV64-LABEL: zext_nneg_i8_to_i16: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 255 +; RV64-NEXT: ret + %1 = zext nneg i8 %a to i16 + ret i16 %1 +} + +define i32 @zext_nneg_i8_to_i32(i8 %a) nounwind { +; RV32I-LABEL: zext_nneg_i8_to_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: ret +; +; RV64-LABEL: zext_nneg_i8_to_i32: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 255 +; RV64-NEXT: ret + %1 = zext nneg i8 %a to i32 + ret i32 %1 +} + +define i64 @zext_nneg_i8_to_i64(i8 %a) nounwind { +; RV32I-LABEL: zext_nneg_i8_to_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; +; RV64-LABEL: zext_nneg_i8_to_i64: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, 255 +; RV64-NEXT: ret + %1 = zext nneg i8 %a to i64 + ret i64 %1 +} + +define i32 @zext_nneg_i16_to_i32(i16 %a) nounwind { +; RV32I-LABEL: zext_nneg_i16_to_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: zext_nneg_i16_to_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: zext_nneg_i16_to_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: ret + %1 = zext nneg i16 %a to i32 + ret i32 %1 +} + +define i64 @zext_nneg_i16_to_i64(i16 %a) nounwind { +; RV32I-LABEL: zext_nneg_i16_to_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: zext_nneg_i16_to_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: zext_nneg_i16_to_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: ret + %1 = zext nneg i16 %a to i64 + ret i64 %1 +} + +define i64 @zext_nneg_i32_to_i64(i32 %a) nounwind { +; RV32I-LABEL: zext_nneg_i32_to_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; +; RV64-LABEL: zext_nneg_i32_to_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: ret + %1 = zext nneg i32 %a to i64 + ret i64 %1 +} + define i1 @trunc_i8_to_i1(i8 %a) nounwind { ; RV32I-LABEL: trunc_i8_to_i1: ; RV32I: # %bb.0: From 71bf052ec90e77cb4aa66505d47cbc4b6016ac1d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 30 Oct 2023 13:33:37 -0700 Subject: [PATCH 080/144] [SLP][NFC]Add a test for bool logic ops reduction, NFC. --- .../X86/reduction-bool-logic-op-inside.ll | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll new file mode 100644 index 00000000000000..2b5f62bdf98943 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S < %s --passes=slp-vectorizer | FileCheck %s + +define i1 @test(i32 %x) { +; CHECK-LABEL: define i1 @test( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 1 +; CHECK-NEXT: [[OP_RDX:%.*]] = or i1 poison, [[CMP]] +; CHECK-NEXT: ret i1 [[OP_RDX]] +; + %cmp = icmp sgt i32 %x, 1 + %sel1 = select i1 %cmp, i1 true, i1 poison + %sel2 = select i1 %sel1, i1 true, i1 poison + %sel3 = select i1 %sel2, i1 true, i1 poison + %sel4 = select i1 %cmp, i1 true, i1 poison + %ret = or i1 %sel3, %sel4 + ret i1 %ret +} + From 55c9f24344a49cd1deb86af1d79d4dc3a798c6fb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 30 Oct 2023 13:48:27 -0700 Subject: [PATCH 081/144] [CVP] Infer nneg on zext when forming from non-negative sext. (#70715) Builds on #67982 which recently introduced the nneg flag on a zext instruction. --- llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp | 1 + .../test/Transforms/CorrelatedValuePropagation/cond-at-use.ll | 2 +- llvm/test/Transforms/CorrelatedValuePropagation/sext.ll | 4 ++-- llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 9043c434313fed..99b4628cc68dd0 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -1017,6 +1017,7 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) { auto *ZExt = CastInst::CreateZExtOrBitCast(Base, SDI->getType(), "", SDI); ZExt->takeName(SDI); ZExt->setDebugLoc(SDI->getDebugLoc()); + ZExt->setNonNeg(); SDI->replaceAllUsesWith(ZExt); SDI->eraseFromParent(); diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/cond-at-use.ll b/llvm/test/Transforms/CorrelatedValuePropagation/cond-at-use.ll index 8e5ec878bb894f..7ec1028d65e0e9 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/cond-at-use.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/cond-at-use.ll @@ -519,7 +519,7 @@ define i16 @ashr_convert(i16 noundef %x, i16 %y) { define i32 @sext_convert(i16 noundef %x) { ; CHECK-LABEL: @sext_convert( -; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[X:%.*]] to i32 +; CHECK-NEXT: [[EXT:%.*]] = zext nneg i16 [[X:%.*]] to i32 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i16 [[X]], 0 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[EXT]], i32 24 ; CHECK-NEXT: ret i32 [[SEL]] diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/sext.ll b/llvm/test/Transforms/CorrelatedValuePropagation/sext.ll index 62e0bc036e769c..0db520bfc68a22 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/sext.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/sext.ll @@ -18,7 +18,7 @@ define void @test1(i32 %n) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[EXT_WIDE1:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: [[EXT_WIDE1:%.*]] = zext nneg i32 [[A]] to i64 ; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE1]]) ; CHECK-NEXT: [[EXT]] = trunc i64 [[EXT_WIDE1]] to i32 ; CHECK-NEXT: br label [[FOR_COND]] @@ -85,7 +85,7 @@ define void @test3(i32 %n) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[EXT_WIDE1:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[EXT_WIDE1:%.*]] = zext nneg i32 [[N]] to i64 ; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE1]]) ; CHECK-NEXT: [[EXT:%.*]] = trunc i64 [[EXT_WIDE1]] to i32 ; CHECK-NEXT: br label [[EXIT]] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll index 2150026ab5e0df..77f53ad56e1cc4 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll @@ -12,8 +12,8 @@ define dso_local void @_Z3fooPiii(ptr %A, i32 %N, i32 %M) #0 { ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP3]], i1 [[CMP21]], i1 false ; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.cond1.preheader.lr.ph.split.us: -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[M]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[M]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[N]] to i64 ; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul nuw nsw i64 [[TMP0]], [[TMP1]] ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: From 8ca565cd3b68929c0691fd97a6d0f7a52d3e3cd7 Mon Sep 17 00:00:00 2001 From: michaelrj-google <71531609+michaelrj-google@users.noreply.github.com> Date: Mon, 30 Oct 2023 14:04:00 -0700 Subject: [PATCH 082/144] [libc] Fix printf long double truncation bound (#70705) The calculation for if a number being printed is truncated and should be rounded up assumed a double for one of its constants, causing occassional misrounding. This fixes that by making the constant based on the mantissa width. --- libc/src/stdio/printf_core/float_dec_converter.h | 12 +++++++----- libc/test/src/stdio/sprintf_test.cpp | 3 +++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h index 8c5ba0d241eda5..0e152a26025642 100644 --- a/libc/src/stdio/printf_core/float_dec_converter.h +++ b/libc/src/stdio/printf_core/float_dec_converter.h @@ -85,11 +85,13 @@ LIBC_INLINE RoundDirection get_round_direction(int last_digit, bool truncated, template LIBC_INLINE constexpr cpp::enable_if_t, bool> -zero_after_digits(int32_t base_2_exp, int32_t digits_after_point, T mantissa) { +zero_after_digits(int32_t base_2_exp, int32_t digits_after_point, T mantissa, + const int32_t mant_width) { const int32_t required_twos = -base_2_exp - digits_after_point - 1; + // Add 8 to mant width since this is a loose bound. const bool has_trailing_zeros = required_twos <= 0 || - (required_twos < 60 && + (required_twos < (mant_width + 8) && multiple_of_power_of_2(mantissa, static_cast(required_twos))); return has_trailing_zeros; } @@ -568,7 +570,7 @@ LIBC_INLINE int convert_float_decimal_typed(Writer *writer, RoundDirection round; const bool truncated = !zero_after_digits(exponent - MANT_WIDTH, precision, - float_bits.get_explicit_mantissa()); + float_bits.get_explicit_mantissa(), MANT_WIDTH); round = get_round_direction(last_digit, truncated, is_negative); RET_IF_RESULT_NEGATIVE( @@ -733,7 +735,7 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer, // Use the formula from %f. truncated = !zero_after_digits(exponent - MANT_WIDTH, precision - final_exponent, - float_bits.get_explicit_mantissa()); + float_bits.get_explicit_mantissa(), MANT_WIDTH); } } round = get_round_direction(last_digit, truncated, is_negative); @@ -979,7 +981,7 @@ LIBC_INLINE int convert_float_dec_auto_typed(Writer *writer, // Use the formula from %f. truncated = !zero_after_digits(exponent - MANT_WIDTH, exp_precision - base_10_exp, - float_bits.get_explicit_mantissa()); + float_bits.get_explicit_mantissa(), MANT_WIDTH); } } diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp index b2c321c0b15c9d..a8fe8f2557c8ef 100644 --- a/libc/test/src/stdio/sprintf_test.cpp +++ b/libc/test/src/stdio/sprintf_test.cpp @@ -1041,6 +1041,9 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { written = LIBC_NAMESPACE::sprintf(buff, "%Lf", 1.0L); ASSERT_STREQ_LEN(written, buff, "1.000000"); + written = LIBC_NAMESPACE::sprintf(buff, "%.Lf", -2.5L); + ASSERT_STREQ_LEN(written, buff, "-2"); + #if defined(SPECIAL_X86_LONG_DOUBLE) written = LIBC_NAMESPACE::sprintf(buff, "%Lf", 1e100L); From 428af867d89eb28b09e80c6826c4c6daad1ba8cc Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 30 Oct 2023 14:04:15 -0700 Subject: [PATCH 083/144] [DirectX] Update test after `opt` learned to infer datalayout (#70726) Since e39f6c1844fa "[opt] Infer DataLayout from triple if not specified", this test (correctly) emits a load of an i64 with 8 byte alignment, rather than with 4 byte alignment. --- llvm/test/CodeGen/DirectX/typed_ptr.ll | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/DirectX/typed_ptr.ll b/llvm/test/CodeGen/DirectX/typed_ptr.ll index 2975d85b9c9644..5453e87651dd72 100644 --- a/llvm/test/CodeGen/DirectX/typed_ptr.ll +++ b/llvm/test/CodeGen/DirectX/typed_ptr.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -dxil-prepare < %s | FileCheck %s target triple = "dxil-unknown-unknown" @@ -7,7 +8,7 @@ target triple = "dxil-unknown-unknown" define i64 @test(i64* %p) { ; CHECK-LABEL: define i64 @test( ; CHECK-SAME: ptr [[P:%.*]]) { -; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P]], align 4 +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P]], align 8 ; CHECK-NEXT: ret i64 [[V]] ; %v = load i64, i64* %p From 9da19e4340f21455b52d5768439cfbaca4112fe4 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 30 Oct 2023 13:12:19 -0700 Subject: [PATCH 084/144] [SLP]Fix PR70507: correctly handle bool logical ops in reductions. If the very first reduction operation is not bool logical op, but some others are, still need to emit the boo logic op for all the extra reduction operations to avoid incorrect poison propagation. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 20 +++++++++++++++---- .../X86/reduction-bool-logic-op-inside.ll | 2 +- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b6895c649f838c..4bb6301f4612f5 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -13667,10 +13667,12 @@ class HorizontalReduction { static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS, Value *RHS, const Twine &Name, const ReductionOpsListType &ReductionOps) { - bool UseSelect = ReductionOps.size() == 2 || - // Logical or/and. - (ReductionOps.size() == 1 && - isa(ReductionOps.front().front())); + bool UseSelect = + ReductionOps.size() == 2 || + // Logical or/and. + (ReductionOps.size() == 1 && any_of(ReductionOps.front(), [](Value *V) { + return isa(V); + })); assert((!UseSelect || ReductionOps.size() != 2 || isa(ReductionOps[1][0])) && "Expected cmp + select pairs for reduction"); @@ -14104,6 +14106,16 @@ class HorizontalReduction { // Update the final value in the reduction. Builder.SetCurrentDebugLocation( cast(ReductionOps.front().front())->getDebugLoc()); + if ((isa(VectorizedTree) && !isa(Res)) || + (isGuaranteedNotToBePoison(Res) && + !isGuaranteedNotToBePoison(VectorizedTree))) { + auto It = ReducedValsToOps.find(Res); + if (It != ReducedValsToOps.end() && + any_of(It->getSecond(), + [](Instruction *I) { return isBoolLogicOp(I); })) + std::swap(VectorizedTree, Res); + } + return createOp(Builder, RdxKind, VectorizedTree, Res, "op.rdx", ReductionOps); } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll index 2b5f62bdf98943..b66967d183cacc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll @@ -5,7 +5,7 @@ define i1 @test(i32 %x) { ; CHECK-LABEL: define i1 @test( ; CHECK-SAME: i32 [[X:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 1 -; CHECK-NEXT: [[OP_RDX:%.*]] = or i1 poison, [[CMP]] +; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[CMP]], i1 true, i1 poison ; CHECK-NEXT: ret i1 [[OP_RDX]] ; %cmp = icmp sgt i32 %x, 1 From 6995183e174280f3987858bd13a4eca9905f6365 Mon Sep 17 00:00:00 2001 From: Jungwook Park Date: Mon, 30 Oct 2023 21:46:21 +0000 Subject: [PATCH 085/144] [mlir][python] Register LLVM translations in the RegisterEverything for python (#70428) Added missing register_translations in python to replicate the same in the C-API Cleaned up the current calls to register passes where the other calls are already embedded in the mlirRegisterAllPasses. found here, https://discourse.llvm.org/t/opencl-example/74187 --- .../Bindings/Python/RegisterEverything.cpp | 9 +-- mlir/python/mlir/_mlir_libs/__init__.py | 8 ++- .../dialects/{gpu.py => gpu/dialect.py} | 0 .../dialects/gpu/module-to-binary-nvvm.py | 64 +++++++++++++++++++ .../dialects/gpu/module-to-binary-rocdl.py | 64 +++++++++++++++++++ 5 files changed, 138 insertions(+), 7 deletions(-) rename mlir/test/python/dialects/{gpu.py => gpu/dialect.py} (100%) create mode 100644 mlir/test/python/dialects/gpu/module-to-binary-nvvm.py create mode 100644 mlir/test/python/dialects/gpu/module-to-binary-rocdl.py diff --git a/mlir/lib/Bindings/Python/RegisterEverything.cpp b/mlir/lib/Bindings/Python/RegisterEverything.cpp index fed5c36a625bff..6b2f6b0a6a3b86 100644 --- a/mlir/lib/Bindings/Python/RegisterEverything.cpp +++ b/mlir/lib/Bindings/Python/RegisterEverything.cpp @@ -7,20 +7,17 @@ //===----------------------------------------------------------------------===// #include "mlir-c/RegisterEverything.h" -#include "mlir-c/Conversion.h" -#include "mlir-c/Transforms.h" - #include "mlir/Bindings/Python/PybindAdaptors.h" PYBIND11_MODULE(_mlirRegisterEverything, m) { - m.doc() = "MLIR All Upstream Dialects and Passes Registration"; + m.doc() = "MLIR All Upstream Dialects, Translations and Passes Registration"; m.def("register_dialects", [](MlirDialectRegistry registry) { mlirRegisterAllDialects(registry); }); + m.def("register_llvm_translations", + [](MlirContext context) { mlirRegisterAllLLVMTranslations(context); }); // Register all passes on load. mlirRegisterAllPasses(); - mlirRegisterConversionPasses(); - mlirRegisterTransformsPasses(); } diff --git a/mlir/python/mlir/_mlir_libs/__init__.py b/mlir/python/mlir/_mlir_libs/__init__.py index 03fcb10130c3ae..71c074bc955e8c 100644 --- a/mlir/python/mlir/_mlir_libs/__init__.py +++ b/mlir/python/mlir/_mlir_libs/__init__.py @@ -83,7 +83,8 @@ def process_initializer_module(module_name): # If _mlirRegisterEverything is built, then include it as an initializer # module. - process_initializer_module("_mlirRegisterEverything") + if process_initializer_module("_mlirRegisterEverything"): + init_module = importlib.import_module(f"._mlirRegisterEverything", __name__) # Load all _site_initialize_{i} modules, where 'i' is a number starting # at 0. @@ -102,6 +103,11 @@ def __init__(self, *args, **kwargs): # all dialects. It is being done here in order to preserve existing # behavior. See: https://github.com/llvm/llvm-project/issues/56037 self.load_all_available_dialects() + if init_module: + logger.debug( + "Registering translations from initializer %r", init_module + ) + init_module.register_llvm_translations(self) ir.Context = Context diff --git a/mlir/test/python/dialects/gpu.py b/mlir/test/python/dialects/gpu/dialect.py similarity index 100% rename from mlir/test/python/dialects/gpu.py rename to mlir/test/python/dialects/gpu/dialect.py diff --git a/mlir/test/python/dialects/gpu/module-to-binary-nvvm.py b/mlir/test/python/dialects/gpu/module-to-binary-nvvm.py new file mode 100644 index 00000000000000..70c08ceb7a6f2d --- /dev/null +++ b/mlir/test/python/dialects/gpu/module-to-binary-nvvm.py @@ -0,0 +1,64 @@ +# REQUIRES: host-supports-nvptx +# RUN: %PYTHON %s | FileCheck %s + +from mlir.ir import * +import mlir.dialects.gpu as gpu +import mlir.dialects.gpu.passes +from mlir.passmanager import * + + +def run(f): + print("\nTEST:", f.__name__) + with Context(), Location.unknown(): + f() + return f + + +# CHECK-LABEL: testGPUToLLVMBin +@run +def testGPUToLLVMBin(): + with Context(): + module = Module.parse( + r""" +module attributes {gpu.container_module} { + gpu.module @kernel_module1 [#nvvm.target] { + llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr, + %arg2: !llvm.ptr, %arg3: i64, %arg4: i64, + %arg5: i64) attributes {gpu.kernel} { + llvm.return + } + } +} + """ + ) + pm = PassManager("any") + pm.add("gpu-module-to-binary{format=llvm}") + pm.run(module.operation) + print(module) + # CHECK-LABEL:gpu.binary @kernel_module1 + # CHECK:[#gpu.object<#nvvm.target, offload = "{{.*}}">] + + +# CHECK-LABEL: testGPUToASMBin +@run +def testGPUToASMBin(): + with Context(): + module = Module.parse( + r""" +module attributes {gpu.container_module} { + gpu.module @kernel_module2 [#nvvm.target, #nvvm.target] { + llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr, + %arg2: !llvm.ptr, %arg3: i64, %arg4: i64, + %arg5: i64) attributes {gpu.kernel} { + llvm.return + } + } +} + """ + ) + pm = PassManager("any") + pm.add("gpu-module-to-binary{format=isa}") + pm.run(module.operation) + print(module) + # CHECK-LABEL:gpu.binary @kernel_module2 + # CHECK:[#gpu.object<#nvvm.target, properties = {O = 2 : i32}, assembly = "{{.*}}">, #gpu.object<#nvvm.target, properties = {O = 2 : i32}, assembly = "{{.*}}">] diff --git a/mlir/test/python/dialects/gpu/module-to-binary-rocdl.py b/mlir/test/python/dialects/gpu/module-to-binary-rocdl.py new file mode 100644 index 00000000000000..fad088cbd6d893 --- /dev/null +++ b/mlir/test/python/dialects/gpu/module-to-binary-rocdl.py @@ -0,0 +1,64 @@ +# REQUIRES: host-supports-amdgpu +# RUN: %PYTHON %s | FileCheck %s + +from mlir.ir import * +import mlir.dialects.gpu as gpu +import mlir.dialects.gpu.passes +from mlir.passmanager import * + + +def run(f): + print("\nTEST:", f.__name__) + with Context(), Location.unknown(): + f() + return f + + +# CHECK-LABEL: testGPUToLLVMBin +@run +def testGPUToLLVMBin(): + with Context(): + module = Module.parse( + r""" +module attributes {gpu.container_module} { + gpu.module @kernel_module1 [#rocdl.target] { + llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr, + %arg2: !llvm.ptr, %arg3: i64, %arg4: i64, + %arg5: i64) attributes {gpu.kernel} { + llvm.return + } + } +} + """ + ) + pm = PassManager("any") + pm.add("gpu-module-to-binary{format=llvm}") + pm.run(module.operation) + print(module) + # CHECK-LABEL:gpu.binary @kernel_module1 + # CHECK:[#gpu.object<#rocdl.target, offload = "{{.*}}">] + + +# CHECK-LABEL: testGPUToASMBin +@run +def testGPUToASMBin(): + with Context(): + module = Module.parse( + r""" +module attributes {gpu.container_module} { + gpu.module @kernel_module2 [#rocdl.target, #rocdl.target] { + llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr, + %arg2: !llvm.ptr, %arg3: i64, %arg4: i64, + %arg5: i64) attributes {gpu.kernel} { + llvm.return + } + } +} + """ + ) + pm = PassManager("any") + pm.add("gpu-module-to-binary{format=isa}") + pm.run(module.operation) + print(module) + # CHECK-LABEL:gpu.binary @kernel_module2 + # CHECK:[#gpu.object<#rocdl.target, assembly = "{{.*}}">, #gpu.object<#rocdl.target, assembly = "{{.*}}">] From ba1349fc31295a3670b34c189838a133e18c0bed Mon Sep 17 00:00:00 2001 From: Danila Malyutin Date: Mon, 30 Oct 2023 14:50:57 -0700 Subject: [PATCH 086/144] [SCEV] Fix "quick and dirty" difference that could lead to assert (#70688) The old algorithm would remove all operands matching %step SCEV when it intended to only remove a single one. This lead to assert when SCEVAddExpr was of the form %step + %step and potential miscompiles in similar cases. Such SCEVs could be created when construction reached depth thresholds. Fixes #70348 --- llvm/lib/Analysis/ScalarEvolution.cpp | 13 +++++++---- .../Transforms/LoopStrengthReduce/pr70348.ll | 23 +++++++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/LoopStrengthReduce/pr70348.ll diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 2368003177e741..f13e508a6c454b 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1335,11 +1335,14 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV // subtraction is expensive. For this purpose, perform a quick and dirty - // difference, by checking for Step in the operand list. - SmallVector DiffOps; - for (const SCEV *Op : SA->operands()) - if (Op != Step) - DiffOps.push_back(Op); + // difference, by checking for Step in the operand list. Note, that + // SA might have repeated ops, like %a + %a + ..., so only remove one. + SmallVector DiffOps(SA->operands()); + for (auto It = DiffOps.begin(); It != DiffOps.end(); ++It) + if (*It == Step) { + DiffOps.erase(It); + break; + } if (DiffOps.size() == SA->getNumOperands()) return nullptr; diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr70348.ll b/llvm/test/Transforms/LoopStrengthReduce/pr70348.ll new file mode 100644 index 00000000000000..35b48a03a14e43 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/pr70348.ll @@ -0,0 +1,23 @@ +; RUN: opt -S -passes=loop-reduce -scalar-evolution-max-arith-depth=0 %s | FileCheck %s +; +; Make sure we don't trigger an assertion in SCEV here. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" +target triple = "x86_64-unknown-linux-gnu" + +define void @test(i32 %phi) { +; CHECK-LABEL: test +bb: + br label %bb6 + +bb6: ; preds = %bb6, %bb + %phi7 = phi i32 [ 1, %bb ], [ %add44, %bb6 ] + %mul13 = mul i32 %phi7, %phi + %mul16 = mul i32 %mul13, 0 + %add44 = add i32 %phi7, 1 + br i1 true, label %bb51, label %bb6 + +bb51: ; preds = %bb6 + unreachable +} + From c1183399a8205f83a418f20889776589b3b98d53 Mon Sep 17 00:00:00 2001 From: Youngsuk Kim Date: Mon, 30 Oct 2023 16:51:49 -0500 Subject: [PATCH 087/144] [clang] Remove no-op ptr-to-ptr bitcasts (NFC) --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 4 ++-- clang/lib/CodeGen/CoverageMappingGen.cpp | 4 +--- clang/lib/CodeGen/ItaniumCXXABI.cpp | 1 - 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c1be7c2d032158..bcd67b7205c7dd 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1809,7 +1809,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, /*IsInitializer=*/true); CtorCGF.FinishFunction(); Ctor = Fn; - ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); + ID = Fn; } else { Ctor = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, @@ -1858,7 +1858,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); DtorCGF.FinishFunction(); Dtor = Fn; - ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); + ID = Fn; } else { Dtor = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 76ed10091b025b..d77aa4dd78a06b 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -1728,13 +1728,11 @@ void CoverageMappingModuleGen::emitFunctionMappingRecord( void CoverageMappingModuleGen::addFunctionMappingRecord( llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash, const std::string &CoverageMapping, bool IsUsed) { - llvm::LLVMContext &Ctx = CGM.getLLVMContext(); const uint64_t NameHash = llvm::IndexedInstrProf::ComputeHash(NameValue); FunctionRecords.push_back({NameHash, FuncHash, CoverageMapping, IsUsed}); if (!IsUsed) - FunctionNames.push_back( - llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx))); + FunctionNames.push_back(NamePtr); if (CGM.getCodeGenOpts().DumpCoverageMapping) { // Dump the coverage mapping data for this function by decoding the diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 0c89871420bdd3..7877235a63356f 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1324,7 +1324,6 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) { if (!Record->hasTrivialDestructor()) { CXXDestructorDecl *DtorD = Record->getDestructor(); Dtor = CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete)); - Dtor = llvm::ConstantExpr::getBitCast(Dtor, CGM.Int8PtrTy); } } if (!Dtor) Dtor = llvm::Constant::getNullValue(CGM.Int8PtrTy); From a41b149f481e2bcba24e81f208a1938247f040e0 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 30 Oct 2023 14:59:58 -0700 Subject: [PATCH 088/144] [MachineInstr] add insert method for variadic instructions (#67699) As alluded to in #20571, it would be nice if we could mutate operand lists of MachineInstr's more safely. Add an insert method that together with removeOperand allows for easier splicing of operands. Splitting this patch off early to get feedback; I need to either: - mutate an INLINEASM{_BR} MachinInstr's MachineOperands from being registers (physical or virtual) to memory (MachineOperandType::MO_FrameIndex). These are not 1:1 operand replacements, but N:M operand replacements. i.e. we need to update 2 MachineOperands into the middle of the operand list to 5 (at least for x86_64). - copy, modify, write a new MachineInstr which has its relevant operands replaced. Either approaches are hazarded by existing references to either the operands being moved, or the instruction being removed+replaced. For my purposes in regalloc, either seem to work for me, so hopefully reviewers can help me determine which approach is preferable. The second would involve no new methods on MachineInstr. One question I had while looking at this was: "why does MachineInstr have BOTH a NumOperands member AND a MCInstrDesc member that itself has a NumOperands member? How many operands can a MachineInstr have? Do I need to update BOTH (keeping them in sync)?" FWICT, only "variadic" MachineInstrs have MCInstrDesc with NumOperands (of the MCInstrDesc) set to zero. If the MCInstrDesc's NumOperands is non-zero, then the NumOperands on the MachineInstr itself cannot exceed this value (IIUC) else an assert will be triggered. For most non-psuedo instructions (or at least non-varidic instructions), insert is less likely to be useful. To run the newly added unittest: $ pushd llvm/build; ninja CodeGenTests; popd $ ./llvm/build/unittests/CodeGen/CodeGenTests \ --gtest_filter=MachineInstrTest.SpliceOperands This is meant to mirror `MCInst::insert`. --- llvm/include/llvm/CodeGen/MachineInstr.h | 3 + llvm/lib/CodeGen/MachineInstr.cpp | 45 +++++++++++ llvm/unittests/CodeGen/MachineInstrTest.cpp | 83 +++++++++++++++++++++ 3 files changed, 131 insertions(+) diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 0b9ad764af265d..4877f43e8578d1 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1812,6 +1812,9 @@ class MachineInstr /// preferred. void addOperand(const MachineOperand &Op); + /// Inserts Ops BEFORE It. Can untie/retie tied operands. + void insert(mop_iterator InsertBefore, ArrayRef Ops); + /// Replace the instruction descriptor (thus opcode) of /// the current instruction with a new one. void setDesc(const MCInstrDesc &TID); diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 448725893bde02..048563cc2bcc4e 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -2481,3 +2481,48 @@ MachineInstr::getFirst5RegLLTs() const { Reg2, getRegInfo()->getType(Reg2), Reg3, getRegInfo()->getType(Reg3), Reg4, getRegInfo()->getType(Reg4)); } + +void MachineInstr::insert(mop_iterator InsertBefore, + ArrayRef Ops) { + assert(InsertBefore != nullptr && "invalid iterator"); + assert(InsertBefore->getParent() == this && + "iterator points to operand of other inst"); + if (Ops.empty()) + return; + + // Do one pass to untie operands. + SmallDenseMap TiedOpIndices; + for (const MachineOperand &MO : operands()) { + if (MO.isReg() && MO.isTied()) { + unsigned OpNo = getOperandNo(&MO); + unsigned TiedTo = findTiedOperandIdx(OpNo); + TiedOpIndices[OpNo] = TiedTo; + untieRegOperand(OpNo); + } + } + + unsigned OpIdx = getOperandNo(InsertBefore); + unsigned NumOperands = getNumOperands(); + unsigned OpsToMove = NumOperands - OpIdx; + + SmallVector MovingOps; + MovingOps.reserve(OpsToMove); + + for (unsigned I = 0; I < OpsToMove; ++I) { + MovingOps.emplace_back(getOperand(OpIdx)); + removeOperand(OpIdx); + } + for (const MachineOperand &MO : Ops) + addOperand(MO); + for (const MachineOperand &OpMoved : MovingOps) + addOperand(OpMoved); + + // Re-tie operands. + for (auto [Tie1, Tie2] : TiedOpIndices) { + if (Tie1 >= OpIdx) + Tie1 += Ops.size(); + if (Tie2 >= OpIdx) + Tie2 += Ops.size(); + tieOperands(Tie1, Tie2); + } +} diff --git a/llvm/unittests/CodeGen/MachineInstrTest.cpp b/llvm/unittests/CodeGen/MachineInstrTest.cpp index be409a56adb1af..0841cd3a7fb04f 100644 --- a/llvm/unittests/CodeGen/MachineInstrTest.cpp +++ b/llvm/unittests/CodeGen/MachineInstrTest.cpp @@ -478,4 +478,87 @@ TEST(MachineInstrBuilder, BuildMI) { static_assert(std::is_trivially_copyable_v, "trivially copyable"); +TEST(MachineInstrTest, SpliceOperands) { + LLVMContext Ctx; + Module Mod("Module", Ctx); + std::unique_ptr MF = createMachineFunction(Ctx, Mod); + MachineBasicBlock *MBB = MF->CreateMachineBasicBlock(); + MCInstrDesc MCID = {TargetOpcode::INLINEASM, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + (1ULL << MCID::Pseudo) | (1ULL << MCID::Variadic), + 0}; + MachineInstr *MI = MF->CreateMachineInstr(MCID, DebugLoc()); + MBB->insert(MBB->begin(), MI); + MI->addOperand(MachineOperand::CreateImm(0)); + MI->addOperand(MachineOperand::CreateImm(1)); + MI->addOperand(MachineOperand::CreateImm(2)); + MI->addOperand(MachineOperand::CreateImm(3)); + MI->addOperand(MachineOperand::CreateImm(4)); + + MI->removeOperand(1); + EXPECT_EQ(MI->getOperand(1).getImm(), MachineOperand::CreateImm(2).getImm()); + EXPECT_EQ(MI->getNumOperands(), 4U); + + MachineOperand Ops[] = { + MachineOperand::CreateImm(42), MachineOperand::CreateImm(1024), + MachineOperand::CreateImm(2048), MachineOperand::CreateImm(4096), + MachineOperand::CreateImm(8192), + }; + auto *It = MI->operands_begin(); + ++It; + MI->insert(It, Ops); + + EXPECT_EQ(MI->getNumOperands(), 9U); + EXPECT_EQ(MI->getOperand(0).getImm(), MachineOperand::CreateImm(0).getImm()); + EXPECT_EQ(MI->getOperand(1).getImm(), MachineOperand::CreateImm(42).getImm()); + EXPECT_EQ(MI->getOperand(2).getImm(), + MachineOperand::CreateImm(1024).getImm()); + EXPECT_EQ(MI->getOperand(3).getImm(), + MachineOperand::CreateImm(2048).getImm()); + EXPECT_EQ(MI->getOperand(4).getImm(), + MachineOperand::CreateImm(4096).getImm()); + EXPECT_EQ(MI->getOperand(5).getImm(), + MachineOperand::CreateImm(8192).getImm()); + EXPECT_EQ(MI->getOperand(6).getImm(), MachineOperand::CreateImm(2).getImm()); + EXPECT_EQ(MI->getOperand(7).getImm(), MachineOperand::CreateImm(3).getImm()); + EXPECT_EQ(MI->getOperand(8).getImm(), MachineOperand::CreateImm(4).getImm()); + + // test tied operands + MCRegisterClass MRC{0, 0, 0, 0, 0, 0, 0, 0, /*Allocatable=*/true}; + TargetRegisterClass RC{&MRC, 0, 0, {}, 0, 0, 0, 0, 0, 0, 0}; + // MachineRegisterInfo will be very upset if these registers aren't + // allocatable. + assert(RC.isAllocatable() && "unusable TargetRegisterClass"); + MachineRegisterInfo &MRI = MF->getRegInfo(); + Register A = MRI.createVirtualRegister(&RC); + Register B = MRI.createVirtualRegister(&RC); + MI->getOperand(0).ChangeToRegister(A, /*isDef=*/true); + MI->getOperand(1).ChangeToRegister(B, /*isDef=*/false); + MI->tieOperands(0, 1); + EXPECT_TRUE(MI->getOperand(0).isTied()); + EXPECT_TRUE(MI->getOperand(1).isTied()); + EXPECT_EQ(MI->findTiedOperandIdx(0), 1U); + EXPECT_EQ(MI->findTiedOperandIdx(1), 0U); + MI->insert(&MI->getOperand(1), {MachineOperand::CreateImm(7)}); + EXPECT_TRUE(MI->getOperand(0).isTied()); + EXPECT_TRUE(MI->getOperand(1).isImm()); + EXPECT_TRUE(MI->getOperand(2).isTied()); + EXPECT_EQ(MI->findTiedOperandIdx(0), 2U); + EXPECT_EQ(MI->findTiedOperandIdx(2), 0U); + EXPECT_EQ(MI->getOperand(0).getReg(), A); + EXPECT_EQ(MI->getOperand(2).getReg(), B); + + // bad inputs + EXPECT_EQ(MI->getNumOperands(), 10U); + MI->insert(MI->operands_begin(), {}); + EXPECT_EQ(MI->getNumOperands(), 10U); +} + } // end namespace From 91cdd7d615da38a1f025646f526c2fce265a37e2 Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov <87100199+kstoimenov@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:01:21 -0700 Subject: [PATCH 089/144] [HWASAN] Enable memcpy, memmove and memset interceptors (#70387) --- .../lib/hwasan/hwasan_interceptors.cpp | 27 +--------------- .../lib/hwasan/hwasan_platform_interceptors.h | 12 +++---- compiler-rt/test/hwasan/TestCases/bcmp.cpp | 15 ++++++--- compiler-rt/test/hwasan/TestCases/memcmp.cpp | 15 ++++++--- compiler-rt/test/hwasan/TestCases/memcpy.cpp | 32 +++++++++++++++++++ compiler-rt/test/hwasan/TestCases/memmove.cpp | 32 +++++++++++++++++++ compiler-rt/test/hwasan/TestCases/memset.cpp | 32 +++++++++++++++++++ 7 files changed, 125 insertions(+), 40 deletions(-) create mode 100644 compiler-rt/test/hwasan/TestCases/memcpy.cpp create mode 100644 compiler-rt/test/hwasan/TestCases/memmove.cpp create mode 100644 compiler-rt/test/hwasan/TestCases/memset.cpp diff --git a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp index 0889831373a803..5171f035f97f76 100644 --- a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp +++ b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp @@ -90,8 +90,7 @@ struct HWAsanInterceptorContext { # include "sanitizer_common/sanitizer_syscalls_netbsd.inc" # define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \ - do { \ - } while (false) + HWASAN_WRITE_RANGE(ctx, ptr, size) # define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size) \ HWASAN_READ_RANGE(ctx, ptr, size) @@ -147,30 +146,6 @@ struct HWAsanInterceptorContext { (void)(name); \ } while (false) -# define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size) \ - do { \ - (void)(ctx); \ - (void)(to); \ - (void)(from); \ - (void)(size); \ - } while (false) - -# define COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size) \ - do { \ - (void)(ctx); \ - (void)(to); \ - (void)(from); \ - (void)(size); \ - } while (false) - -# define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size) \ - do { \ - (void)(ctx); \ - (void)(block); \ - (void)(c); \ - (void)(size); \ - } while (false) - # define COMMON_INTERCEPTOR_STRERROR() \ do { \ } while (false) diff --git a/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h b/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h index 86d26b5ac12d4a..d92b5105219427 100644 --- a/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h +++ b/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h @@ -56,14 +56,14 @@ #undef SANITIZER_INTERCEPT_STRCASECMP #define SANITIZER_INTERCEPT_STRCASECMP 0 -#undef SANITIZER_INTERCEPT_MEMSET -#define SANITIZER_INTERCEPT_MEMSET 0 +// #undef SANITIZER_INTERCEPT_MEMSET +// #define SANITIZER_INTERCEPT_MEMSET 0 -#undef SANITIZER_INTERCEPT_MEMMOVE -#define SANITIZER_INTERCEPT_MEMMOVE 0 +// #undef SANITIZER_INTERCEPT_MEMMOVE +// #define SANITIZER_INTERCEPT_MEMMOVE 0 -#undef SANITIZER_INTERCEPT_MEMCPY -#define SANITIZER_INTERCEPT_MEMCPY 0 +// #undef SANITIZER_INTERCEPT_MEMCPY +// #define SANITIZER_INTERCEPT_MEMCPY 0 // #undef SANITIZER_INTERCEPT_MEMCMP // #define SANITIZER_INTERCEPT_MEMCMP 0 diff --git a/compiler-rt/test/hwasan/TestCases/bcmp.cpp b/compiler-rt/test/hwasan/TestCases/bcmp.cpp index a83147b0f32052..9b21bba56b1bee 100644 --- a/compiler-rt/test/hwasan/TestCases/bcmp.cpp +++ b/compiler-rt/test/hwasan/TestCases/bcmp.cpp @@ -4,11 +4,17 @@ // RUN: %clangxx_hwasan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s // REQUIRES: !android +#include #include #include #include #include +__attribute__((no_sanitize("hwaddress"))) void +ForceCallInterceptor(void *p, const void *a, size_t size) { + assert(bcmp(p, a, size) == 0); +} + int main(int argc, char **argv) { __hwasan_enable_allocator_tagging(); char a[] = {static_cast(argc), 2, 3, 4}; @@ -16,13 +22,14 @@ int main(int argc, char **argv) { char *p = (char *)malloc(size); memcpy(p, a, size); free(p); - return bcmp(p, a, size); + ForceCallInterceptor(p, a, size); + return 0; // CHECK: HWAddressSanitizer: tag-mismatch on address // CHECK: READ of size 4 - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-3]] + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-4]] // CHECK: Cause: use-after-free // CHECK: freed by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-7]] + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-8]] // CHECK: previously allocated by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-11]] + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}bcmp.cpp:[[@LINE-12]] } diff --git a/compiler-rt/test/hwasan/TestCases/memcmp.cpp b/compiler-rt/test/hwasan/TestCases/memcmp.cpp index 5f8a93f62a44a1..31915527c27fdd 100644 --- a/compiler-rt/test/hwasan/TestCases/memcmp.cpp +++ b/compiler-rt/test/hwasan/TestCases/memcmp.cpp @@ -3,11 +3,17 @@ // RUN: %clangxx_hwasan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s // RUN: %clangxx_hwasan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s +#include #include #include #include #include +__attribute__((no_sanitize("hwaddress"))) void +ForceCallInterceptor(void *p, const void *a, size_t size) { + assert(memcmp(p, a, size) == 0); +} + int main(int argc, char **argv) { __hwasan_enable_allocator_tagging(); char a[] = {static_cast(argc), 2, 3, 4}; @@ -15,13 +21,14 @@ int main(int argc, char **argv) { char *p = (char *)malloc(size); memcpy(p, a, size); free(p); - return memcmp(p, a, size); + ForceCallInterceptor(p, a, size); + return 0; // CHECK: HWAddressSanitizer: tag-mismatch on address // CHECK: READ of size 4 - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcmp.cpp:[[@LINE-3]] + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcmp.cpp:[[@LINE-4]] // CHECK: Cause: use-after-free // CHECK: freed by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcmp.cpp:[[@LINE-7]] + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcmp.cpp:[[@LINE-8]] // CHECK: previously allocated by thread - // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcmp.cpp:[[@LINE-11]] + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcmp.cpp:[[@LINE-12]] } diff --git a/compiler-rt/test/hwasan/TestCases/memcpy.cpp b/compiler-rt/test/hwasan/TestCases/memcpy.cpp new file mode 100644 index 00000000000000..830449488fec49 --- /dev/null +++ b/compiler-rt/test/hwasan/TestCases/memcpy.cpp @@ -0,0 +1,32 @@ +// RUN: %clangxx_hwasan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_hwasan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_hwasan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_hwasan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s + +#include +#include +#include +#include + +__attribute__((no_sanitize("hwaddress"))) void +ForceCallInterceptor(void *p, const void *a, size_t size) { + memcpy(p, a, size); +} + +int main(int argc, char **argv) { + __hwasan_enable_allocator_tagging(); + char a[] = {static_cast(argc), 2, 3, 4}; + int size = sizeof(a); + char *volatile p = (char *)malloc(size); + free(p); + ForceCallInterceptor(p, a, size); + return 0; + // CHECK: HWAddressSanitizer: tag-mismatch on address + // CHECK: WRITE of size 4 + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcpy.cpp:[[@LINE-4]] + // CHECK: Cause: use-after-free + // CHECK: freed by thread + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcpy.cpp:[[@LINE-8]] + // CHECK: previously allocated by thread + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memcpy.cpp:[[@LINE-11]] +} diff --git a/compiler-rt/test/hwasan/TestCases/memmove.cpp b/compiler-rt/test/hwasan/TestCases/memmove.cpp new file mode 100644 index 00000000000000..40dc3deeb39350 --- /dev/null +++ b/compiler-rt/test/hwasan/TestCases/memmove.cpp @@ -0,0 +1,32 @@ +// RUN: %clangxx_hwasan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_hwasan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_hwasan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_hwasan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s + +#include +#include +#include +#include + +__attribute__((no_sanitize("hwaddress"))) void +ForceCallInterceptor(void *p, const void *a, size_t size) { + memmove(p, a, size); +} + +int main(int argc, char **argv) { + __hwasan_enable_allocator_tagging(); + char a[] = {static_cast(argc), 2, 3, 4}; + int size = sizeof(a); + char *volatile p = (char *)malloc(size); + free(p); + ForceCallInterceptor(p, a, size); + return 0; + // CHECK: HWAddressSanitizer: tag-mismatch on address + // CHECK: WRITE of size 4 + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memmove.cpp:[[@LINE-4]] + // CHECK: Cause: use-after-free + // CHECK: freed by thread + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memmove.cpp:[[@LINE-8]] + // CHECK: previously allocated by thread + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memmove.cpp:[[@LINE-11]] +} diff --git a/compiler-rt/test/hwasan/TestCases/memset.cpp b/compiler-rt/test/hwasan/TestCases/memset.cpp new file mode 100644 index 00000000000000..ae31a3bfe9cdaa --- /dev/null +++ b/compiler-rt/test/hwasan/TestCases/memset.cpp @@ -0,0 +1,32 @@ +// RUN: %clangxx_hwasan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_hwasan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_hwasan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_hwasan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s + +#include +#include +#include +#include + +__attribute__((no_sanitize("hwaddress"))) void +ForceCallInterceptor(void *p, int c, size_t size) { + memset(p, c, size) == nullptr; +} + +int main(int argc, char **argv) { + __hwasan_enable_allocator_tagging(); + char a[] = {static_cast(argc), 2, 3, 4}; + int size = sizeof(a); + char *volatile p = (char *)malloc(size); + free(p); + ForceCallInterceptor(p, 0, size); + return 0; + // CHECK: HWAddressSanitizer: tag-mismatch on address + // CHECK: WRITE of size 4 + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memset.cpp:[[@LINE-4]] + // CHECK: Cause: use-after-free + // CHECK: freed by thread + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memset.cpp:[[@LINE-8]] + // CHECK: previously allocated by thread + // CHECK: #{{[[:digit:]]+}} 0x{{[[:xdigit:]]+}} in main {{.*}}memset.cpp:[[@LINE-11]] +} From b1c59b516cbbbb17ab8ceea0a9046924d1683583 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 30 Oct 2023 15:07:22 -0700 Subject: [PATCH 090/144] [SCCP] Infer nneg on zext when forming from non-negative sext. (#70730) Builds on #67982 which recently introduced the nneg flag on a zext instruction. --- llvm/lib/Transforms/Utils/SCCPSolver.cpp | 1 + llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll | 4 ++-- llvm/test/Transforms/SCCP/ip-ranges-casts.ll | 4 ++-- llvm/test/Transforms/SCCP/ip-ranges-sext.ll | 8 ++++---- llvm/test/Transforms/SCCP/ranges-sext.ll | 8 ++++---- llvm/test/Transforms/SCCP/widening.ll | 4 ++-- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 4b96b02ee2ecd6..ea8425c5d5ddc1 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -171,6 +171,7 @@ static bool replaceSignedInst(SCCPSolver &Solver, if (InsertedValues.count(Op0) || !isNonNegative(Op0)) return false; NewInst = new ZExtInst(Op0, Inst.getType(), "", &Inst); + NewInst->setNonNeg(); break; } case Instruction::AShr: { diff --git a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll index 97b471d9854102..b8f5d5dba0c4b2 100644 --- a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll +++ b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll @@ -124,7 +124,7 @@ define i16 @sge_with_sext_to_zext_conversion(i8 %a) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[A:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: [[SEXT:%.*]] = zext i8 [[A]] to i16 +; CHECK-NEXT: [[SEXT:%.*]] = zext nneg i8 [[A]] to i16 ; CHECK-NEXT: [[ADD_1:%.*]] = add i16 [[SEXT]], 1 ; CHECK-NEXT: [[ADD_2:%.*]] = add i16 [[SEXT]], -128 ; CHECK-NEXT: [[ADD_3:%.*]] = add i16 [[SEXT]], -127 @@ -219,7 +219,7 @@ define i16 @test_add_in_different_block(i1 %c, i8 %a) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0 ; CHECK-NEXT: [[COND4:%.*]] = select i1 [[CMP]], i8 1, i8 0 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[COND4]] to i16 +; CHECK-NEXT: [[CONV:%.*]] = zext nneg i8 [[COND4]] to i16 ; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[ADD:%.*]] = add i16 1, [[CONV]] diff --git a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll index 980d7d87abc760..6fbcb5d166dce7 100644 --- a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll +++ b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll @@ -112,7 +112,7 @@ define i1 @caller.zext() { ; x = [100, 301) define internal i1 @f.sext(i32 %x, i32 %y) { ; CHECK-LABEL: @f.sext( -; CHECK-NEXT: [[T_1:%.*]] = zext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[T_1:%.*]] = zext nneg i32 [[X:%.*]] to i64 ; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i64 [[T_1]], 299 ; CHECK-NEXT: [[C_4:%.*]] = icmp slt i64 [[T_1]], 101 ; CHECK-NEXT: [[RES_1:%.*]] = add nuw nsw i1 false, [[C_2]] @@ -318,7 +318,7 @@ entry: define internal i64 @f.sext_to_zext(i32 %t) { ; CHECK-LABEL: @f.sext_to_zext( -; CHECK-NEXT: [[A:%.*]] = zext i32 [[T:%.*]] to i64 +; CHECK-NEXT: [[A:%.*]] = zext nneg i32 [[T:%.*]] to i64 ; CHECK-NEXT: ret i64 [[A]] ; %a = sext i32 %t to i64 diff --git a/llvm/test/Transforms/SCCP/ip-ranges-sext.ll b/llvm/test/Transforms/SCCP/ip-ranges-sext.ll index 6fa74b379f4c81..10f5fa3b07eac2 100644 --- a/llvm/test/Transforms/SCCP/ip-ranges-sext.ll +++ b/llvm/test/Transforms/SCCP/ip-ranges-sext.ll @@ -6,7 +6,7 @@ define i64 @test1(i32 %x) { ; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[X:%.*]], 0 ; CHECK-NEXT: br i1 [[C]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[EXT_1:%.*]] = zext i32 [[X]] to i64 +; CHECK-NEXT: [[EXT_1:%.*]] = zext nneg i32 [[X]] to i64 ; CHECK-NEXT: ret i64 [[EXT_1]] ; CHECK: false: ; CHECK-NEXT: [[EXT_2:%.*]] = sext i32 [[X]] to i64 @@ -29,7 +29,7 @@ define i64 @test2(i32 %x) { ; CHECK-NEXT: [[C:%.*]] = icmp sge i32 [[X:%.*]], 0 ; CHECK-NEXT: br i1 [[C]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[EXT_1:%.*]] = zext i32 [[X]] to i64 +; CHECK-NEXT: [[EXT_1:%.*]] = zext nneg i32 [[X]] to i64 ; CHECK-NEXT: ret i64 [[EXT_1]] ; CHECK: false: ; CHECK-NEXT: [[EXT_2:%.*]] = sext i32 [[X]] to i64 @@ -105,7 +105,7 @@ exit: define i64 @test5(i32 %x) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: [[P:%.*]] = and i32 [[X:%.*]], 15 -; CHECK-NEXT: [[EXT:%.*]] = zext i32 [[P]] to i64 +; CHECK-NEXT: [[EXT:%.*]] = zext nneg i32 [[P]] to i64 ; CHECK-NEXT: ret i64 [[EXT]] ; %p = and i32 %x, 15 @@ -126,7 +126,7 @@ define i64 @test6(i32 %x) { define i64 @test7(i16 %x) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[P:%.*]] = and i16 [[X:%.*]], 15 -; CHECK-NEXT: [[EXT_1:%.*]] = zext i16 [[P]] to i32 +; CHECK-NEXT: [[EXT_1:%.*]] = zext nneg i16 [[P]] to i32 ; CHECK-NEXT: [[EXT_2:%.*]] = sext i32 [[EXT_1]] to i64 ; CHECK-NEXT: ret i64 [[EXT_2]] ; diff --git a/llvm/test/Transforms/SCCP/ranges-sext.ll b/llvm/test/Transforms/SCCP/ranges-sext.ll index bd924a73930155..0661b8605137e1 100644 --- a/llvm/test/Transforms/SCCP/ranges-sext.ll +++ b/llvm/test/Transforms/SCCP/ranges-sext.ll @@ -68,8 +68,8 @@ exit: define i64 @test2(i32 %x) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: [[P:%.*]] = and i32 [[X:%.*]], 15 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[P]] to i64 -; CHECK-NEXT: ret i64 [[TMP1]] +; CHECK-NEXT: [[EXT:%.*]] = zext nneg i32 [[P]] to i64 +; CHECK-NEXT: ret i64 [[EXT]] ; %p = and i32 %x, 15 %ext = sext i32 %p to i64 @@ -87,8 +87,8 @@ define i64 @test3(i1 %c.1, i1 %c.2) { ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[TRUE_1]] ], [ 1, [[TRUE_2]] ], [ 3, [[FALSE]] ] -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[P]] to i64 -; CHECK-NEXT: ret i64 [[TMP1]] +; CHECK-NEXT: [[EXT:%.*]] = zext nneg i32 [[P]] to i64 +; CHECK-NEXT: ret i64 [[EXT]] ; br i1 %c.1, label %true.1, label %false diff --git a/llvm/test/Transforms/SCCP/widening.ll b/llvm/test/Transforms/SCCP/widening.ll index f482ed3a4e7f65..2223ca44bccdbb 100644 --- a/llvm/test/Transforms/SCCP/widening.ll +++ b/llvm/test/Transforms/SCCP/widening.ll @@ -450,7 +450,7 @@ define void @foo(ptr %arg) { ; SCCP-NEXT: [[TMP7:%.*]] = sub nuw nsw i64 3, [[TMP6]] ; SCCP-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 1 ; SCCP-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 -; SCCP-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +; SCCP-NEXT: [[TMP10:%.*]] = zext nneg i32 [[TMP9]] to i64 ; SCCP-NEXT: br label [[BB11:%.*]] ; SCCP: bb11: ; SCCP-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP10]], [[BB4]] ], [ [[TMP17:%.*]], [[BB18:%.*]] ] @@ -487,7 +487,7 @@ define void @foo(ptr %arg) { ; IPSCCP-NEXT: [[TMP7:%.*]] = sub nuw nsw i64 3, [[TMP6]] ; IPSCCP-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 1 ; IPSCCP-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 -; IPSCCP-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +; IPSCCP-NEXT: [[TMP10:%.*]] = zext nneg i32 [[TMP9]] to i64 ; IPSCCP-NEXT: br label [[BB11:%.*]] ; IPSCCP: bb11: ; IPSCCP-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP10]], [[BB4]] ], [ [[TMP17:%.*]], [[BB18:%.*]] ] From f82bee1367a1d612d688790b81c1c146ce99f2ea Mon Sep 17 00:00:00 2001 From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:22:21 -0700 Subject: [PATCH 091/144] [mlir][sparse] split post-sparsification-rewriting into two passes. (#70727) --- .../Dialect/SparseTensor/Transforms/Passes.h | 22 +++++---- .../Dialect/SparseTensor/Transforms/Passes.td | 23 +++++++--- .../Transforms/SparseTensorPasses.cpp | 46 ++++++++++++------- .../Transforms/SparseTensorRewriting.cpp | 14 +++--- .../SparsificationAndBufferizationPass.cpp | 5 +- mlir/test/Dialect/SparseTensor/codegen.mlir | 2 +- .../test/Dialect/SparseTensor/conversion.mlir | 2 +- .../SparseTensor/convert_dense2sparse.mlir | 2 +- .../SparseTensor/convert_sparse2dense.mlir | 2 +- .../SparseTensor/convert_sparse2sparse.mlir | 2 +- .../SparseTensor/rewriting_for_codegen.mlir | 4 +- .../Dialect/SparseTensor/sparse_concat.mlir | 4 +- .../Dialect/SparseTensor/sparse_expand.mlir | 3 +- .../Dialect/SparseTensor/sparse_foreach.mlir | 2 +- .../Dialect/SparseTensor/sparse_pack.mlir | 2 +- .../Dialect/SparseTensor/sparse_reshape.mlir | 8 ++-- .../SparseTensor/sparse_tensor_reshape.mlir | 4 +- 17 files changed, 92 insertions(+), 55 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h index b1979f032393ba..a8d4d752dff888 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h @@ -114,17 +114,23 @@ void populateStageSparseOperationsPatterns(RewritePatternSet &patterns); std::unique_ptr createStageSparseOperationsPass(); //===----------------------------------------------------------------------===// -// The PostSparsificationRewriting pass. +// The LowerSparseOpsToForeach pass. //===----------------------------------------------------------------------===// -void populatePostSparsificationRewriting(RewritePatternSet &patterns, - bool enableRT, bool enableForeach, - bool enableConvert); +void populateLowerSparseOpsToForeachPatterns(RewritePatternSet &patterns, + bool enableRT, bool enableConvert); -std::unique_ptr createPostSparsificationRewritePass(); -std::unique_ptr -createPostSparsificationRewritePass(bool enableRT, bool enableForeach = true, - bool enableConvert = true); +std::unique_ptr createLowerSparseOpsToForeachPass(); +std::unique_ptr createLowerSparseOpsToForeachPass(bool enableRT, + bool enableConvert); + +//===----------------------------------------------------------------------===// +// The LowerForeachToSCF pass. +//===----------------------------------------------------------------------===// + +void populateLowerForeachToSCFPatterns(RewritePatternSet &patterns); + +std::unique_ptr createLowerForeachToSCFPass(); //===----------------------------------------------------------------------===// // The SparseTensorConversion pass. diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td index 99dbd7ab3677e7..995e842289035b 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td @@ -167,13 +167,12 @@ def StageSparseOperations : Pass<"stage-sparse-ops", "func::FuncOp"> { ]; } -def PostSparsificationRewrite : Pass<"post-sparsification-rewrite", "ModuleOp"> { +def LowerSparseOpsToForeach : Pass<"lower-sparse-ops-to-foreach", "ModuleOp"> { let summary = "Applies sparse tensor rewriting rules after sparsification"; let description = [{ - A pass that applies rewriting rules to sparse tensor operations after - running the actual sparsification pass. + A pass that lowers high-level sparse operations to sparse_tensor.foreach. }]; - let constructor = "mlir::createPostSparsificationRewritePass()"; + let constructor = "mlir::createLowerSparseOpsToForeachPass()"; let dependentDialects = [ "affine::AffineDialect", "arith::ArithDialect", @@ -186,13 +185,25 @@ def PostSparsificationRewrite : Pass<"post-sparsification-rewrite", "ModuleOp"> let options = [ Option<"enableRuntimeLibrary", "enable-runtime-library", "bool", "true", "Enable runtime library for manipulating sparse tensors">, - Option<"enableForeach", "enable-foreach", "bool", - "true", "Enable rewriting rules for the foreach operator">, Option<"enableConvert", "enable-convert", "bool", "true", "Enable rewriting rules for the convert operator">, ]; } +def LowerForeachToSCF : Pass<"lower-sparse-foreach-to-scf", "func::FuncOp"> { + let summary = "Decompose a complex sparse operation into multiple stages"; + let description = [{ + A pass that lowers sparse_tensor.foreach operation to scf dialect. + }]; + let constructor = "mlir::createLowerForeachToSCFPass()"; + let dependentDialects = [ + "memref::MemRefDialect", + "scf::SCFDialect", + "sparse_tensor::SparseTensorDialect", + ]; +} + + def SparseTensorConversionPass : Pass<"sparse-tensor-conversion", "ModuleOp"> { let summary = "Convert sparse tensors and primitives to library calls"; let description = [{ diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp index 095a6ab9a508eb..c5fd19a811d6bb 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp @@ -25,7 +25,8 @@ namespace mlir { #define GEN_PASS_DEF_SPARSEREINTERPRETMAP #define GEN_PASS_DEF_PRESPARSIFICATIONREWRITE #define GEN_PASS_DEF_SPARSIFICATIONPASS -#define GEN_PASS_DEF_POSTSPARSIFICATIONREWRITE +#define GEN_PASS_DEF_LOWERSPARSEOPSTOFOREACH +#define GEN_PASS_DEF_LOWERFOREACHTOSCF #define GEN_PASS_DEF_SPARSETENSORCONVERSIONPASS #define GEN_PASS_DEF_SPARSETENSORCODEGEN #define GEN_PASS_DEF_SPARSEBUFFERREWRITE @@ -120,23 +121,34 @@ struct StageSparseOperationsPass } }; -struct PostSparsificationRewritePass - : public impl::PostSparsificationRewriteBase< - PostSparsificationRewritePass> { - PostSparsificationRewritePass() = default; - PostSparsificationRewritePass(const PostSparsificationRewritePass &pass) = +struct LowerSparseOpsToForeachPass + : public impl::LowerSparseOpsToForeachBase { + LowerSparseOpsToForeachPass() = default; + LowerSparseOpsToForeachPass(const LowerSparseOpsToForeachPass &pass) = default; - PostSparsificationRewritePass(bool enableRT, bool foreach, bool convert) { + LowerSparseOpsToForeachPass(bool enableRT, bool convert) { enableRuntimeLibrary = enableRT; - enableForeach = foreach; enableConvert = convert; } void runOnOperation() override { auto *ctx = &getContext(); RewritePatternSet patterns(ctx); - populatePostSparsificationRewriting(patterns, enableRuntimeLibrary, - enableForeach, enableConvert); + populateLowerSparseOpsToForeachPatterns(patterns, enableRuntimeLibrary, + enableConvert); + (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); + } +}; + +struct LowerForeachToSCFPass + : public impl::LowerForeachToSCFBase { + LowerForeachToSCFPass() = default; + LowerForeachToSCFPass(const LowerForeachToSCFPass &pass) = default; + + void runOnOperation() override { + auto *ctx = &getContext(); + RewritePatternSet patterns(ctx); + populateLowerForeachToSCFPatterns(patterns); (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); } }; @@ -399,15 +411,17 @@ std::unique_ptr mlir::createStageSparseOperationsPass() { return std::make_unique(); } -std::unique_ptr mlir::createPostSparsificationRewritePass() { - return std::make_unique(); +std::unique_ptr mlir::createLowerSparseOpsToForeachPass() { + return std::make_unique(); } std::unique_ptr -mlir::createPostSparsificationRewritePass(bool enableRT, bool enableForeach, - bool enableConvert) { - return std::make_unique( - enableRT, enableForeach, enableConvert); +mlir::createLowerSparseOpsToForeachPass(bool enableRT, bool enableConvert) { + return std::make_unique(enableRT, enableConvert); +} + +std::unique_ptr mlir::createLowerForeachToSCFPass() { + return std::make_unique(); } std::unique_ptr mlir::createSparseTensorConversionPass() { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index e9bcb5dc070ade..528e70bd3b1ef5 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -1303,10 +1303,9 @@ void mlir::populatePreSparsificationRewriting(RewritePatternSet &patterns) { GenSemiRingReduction, GenSemiRingSelect>(patterns.getContext()); } -void mlir::populatePostSparsificationRewriting(RewritePatternSet &patterns, - bool enableRT, - bool enableForeach, - bool enableConvert) { +void mlir::populateLowerSparseOpsToForeachPatterns(RewritePatternSet &patterns, + bool enableRT, + bool enableConvert) { patterns.add, ReshapeRewriter, @@ -1314,10 +1313,13 @@ void mlir::populatePostSparsificationRewriting(RewritePatternSet &patterns, Sparse2SparseReshapeRewriter, SparseTensorDimOpRewriter, TensorReshapeRewriter, OutRewriter>( patterns.getContext()); - if (enableForeach) - patterns.add(patterns.getContext()); + if (enableConvert) patterns.add(patterns.getContext()); if (!enableRT) patterns.add(patterns.getContext()); } + +void mlir::populateLowerForeachToSCFPatterns(RewritePatternSet &patterns) { + patterns.add(patterns.getContext()); +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp index d8a24ea3527b19..f3f3828e0c5bdf 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp @@ -141,7 +141,10 @@ class SparsificationAndBufferizationPass OpPassManager pm("builtin.module"); pm.addPass(createSparsificationPass(sparsificationOptions)); pm.addNestedPass(createStageSparseOperationsPass()); - pm.addPass(createPostSparsificationRewritePass(enableRuntimeLibrary)); + pm.addPass(createLowerSparseOpsToForeachPass(enableRuntimeLibrary, + /*enableConvert=*/true)); + // TODO: DemapPass here! + pm.addNestedPass(createLowerForeachToSCFPass()); if (vectorLength > 0) { pm.addPass(mlir::createLoopInvariantCodeMotionPass()); pm.addPass(createSparseVectorizationPass( diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir index 8993333d6e5333..c53ec7408bc3b8 100644 --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --post-sparsification-rewrite --sparse-tensor-codegen --canonicalize -cse | FileCheck %s +// RUN: mlir-opt %s --lower-sparse-ops-to-foreach --lower-sparse-foreach-to-scf --sparse-tensor-codegen --canonicalize -cse | FileCheck %s #SV = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 092ba6b8358b59..27d8f296c9ad0c 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --post-sparsification-rewrite --sparse-tensor-conversion --canonicalize --cse | FileCheck %s +// RUN: mlir-opt %s --lower-sparse-ops-to-foreach --lower-sparse-foreach-to-scf --sparse-tensor-conversion --canonicalize --cse | FileCheck %s #SparseVector = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir index 4dba16df39f5c6..4f37ae9207be9c 100644 --- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --stage-sparse-ops --post-sparsification-rewrite="enable-foreach=false" --canonicalize --cse | FileCheck %s +// RUN: mlir-opt %s --stage-sparse-ops --lower-sparse-ops-to-foreach --canonicalize --cse | FileCheck %s #SparseVector = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir index e2dcb068e11851..730a5452df3944 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --stage-sparse-ops --post-sparsification-rewrite="enable-foreach=false" --canonicalize --cse | FileCheck %s +// RUN: mlir-opt %s --stage-sparse-ops --lower-sparse-ops-to-foreach --canonicalize --cse | FileCheck %s #SparseVector = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir index 0280e27b4e312a..896bc02212971f 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --stage-sparse-ops --post-sparsification-rewrite="enable-foreach=false" --canonicalize --cse | FileCheck %s +// RUN: mlir-opt %s --stage-sparse-ops --lower-sparse-ops-to-foreach --canonicalize --cse | FileCheck %s #SparseVector64 = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed), diff --git a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir index 1e72f059baec29..93e802bc6065e4 100644 --- a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -post-sparsification-rewrite="enable-runtime-library=false enable-convert=false" | \ -// RUN: FileCheck %s +// RUN: mlir-opt %s --lower-sparse-ops-to-foreach="enable-runtime-library=false enable-convert=false" \ +// RUN: --lower-sparse-foreach-to-scf | FileCheck %s #CSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir index f3d3dd28563e89..e4e2748112d78c 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s --post-sparsification-rewrite="enable-runtime-library=false enable-convert=false" \ +// RUN: mlir-opt %s --lower-sparse-ops-to-foreach="enable-runtime-library=false enable-convert=false" --lower-sparse-foreach-to-scf \ // RUN: | FileCheck %s -// RUN: mlir-opt %s --post-sparsification-rewrite="enable-runtime-library=true enable-convert=false" \ +// RUN: mlir-opt %s --lower-sparse-ops-to-foreach="enable-runtime-library=true enable-convert=false" --lower-sparse-foreach-to-scf \ // RUN: | FileCheck %s diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir index 3ee6e84a2382a9..0f367f12483f63 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir @@ -4,7 +4,8 @@ // RUN: FileCheck %s --check-prefix=CHECK-SPARSE // RUN: mlir-opt %s --linalg-generalize-named-ops \ // RUN: --linalg-fuse-elementwise-ops \ -// RUN: --sparsification --post-sparsification-rewrite \ +// RUN: --sparsification --lower-sparse-ops-to-foreach \ +// RUN: --lower-sparse-foreach-to-scf \ // RUN: --sparse-tensor-conversion --cse | \ // RUN: FileCheck %s --check-prefix=CHECK-CONVERT diff --git a/mlir/test/Dialect/SparseTensor/sparse_foreach.mlir b/mlir/test/Dialect/SparseTensor/sparse_foreach.mlir index bbce42c100641a..5983289c752efc 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_foreach.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_foreach.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --post-sparsification-rewrite="enable-runtime-library=false enable-foreach=true" --canonicalize | FileCheck %s +// RUN: mlir-opt %s --lower-sparse-foreach-to-scf --canonicalize | FileCheck %s // CHECK-LABEL: func.func @sparse_foreach_constant // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_pack.mlir b/mlir/test/Dialect/SparseTensor/sparse_pack.mlir index 9af998be2f6829..80cfa3c635f361 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_pack.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_pack.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --canonicalize --post-sparsification-rewrite="enable-runtime-library=false" --sparse-tensor-codegen -cse --canonicalize | FileCheck %s +// RUN: mlir-opt %s --canonicalize --sparse-tensor-codegen -cse --canonicalize | FileCheck %s #COO = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton), diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir index 4f105f3e19b3e7..d3d6d8c91fa45a 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir @@ -1,8 +1,8 @@ // RUN: mlir-opt %s | mlir-opt | FileCheck %s --check-prefix=CHECK-ROUND -// RUN: mlir-opt %s --post-sparsification-rewrite="enable-runtime-library=true enable-convert=false" \ -// RUN: --cse --canonicalize | FileCheck %s -// RUN: mlir-opt %s --post-sparsification-rewrite="enable-runtime-library=false enable-convert=false" \ -// RUN: --cse --canonicalize | FileCheck %s +// RUN: mlir-opt %s --lower-sparse-ops-to-foreach="enable-runtime-library=true enable-convert=false" \ +// RUN: --lower-sparse-foreach-to-scf --cse --canonicalize | FileCheck %s +// RUN: mlir-opt %s --lower-sparse-ops-to-foreach="enable-runtime-library=false enable-convert=false" \ +// RUN: --lower-sparse-foreach-to-scf --cse --canonicalize | FileCheck %s #SparseVector = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }> #SparseMatrix = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }> diff --git a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir index a1578eb20b8ba3..339d65ce5716fa 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s --post-sparsification-rewrite="enable-runtime-library=false enable-convert=false" \ -// RUN: --cse --canonicalize | FileCheck %s +// RUN: mlir-opt %s --lower-sparse-ops-to-foreach="enable-runtime-library=false enable-convert=false" \ +// RUN: --lower-sparse-foreach-to-scf --cse --canonicalize | FileCheck %s #SparseMatrix = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }> From 83c560b3bf46e9b5a65f9a41b60e21898e286c9c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 30 Oct 2023 15:29:57 -0700 Subject: [PATCH 092/144] [SDAG] Prefer forming sign_extend for zext nneg per target preference (#70725) Builds on #67982 which recently introduced the nneg flag on a zext instruction. Note that this change is the first point where the flag is being used for an optimization, and thus may expose latent miscompiles. We've recently taught both CVP and InstCombine to infer the flag when forming zext, but nothing else is using the flag just yet. --- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 16 ++++++++++++++-- llvm/test/CodeGen/RISCV/sext-zext-trunc.ll | 3 +-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 0e6129aaf52192..c518b1f95e9023 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3524,8 +3524,20 @@ void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), - I.getType()); + auto &TLI = DAG.getTargetLoweringInfo(); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + // Since we don't yet have a representation of zext nneg in SDAG or MI, + // eagerly use the information to canonicalize towards sign_extend if + // that is the target's preference. TODO: Add nneg support to the + // SDAG and MI representations. + if (auto *PNI = dyn_cast(&I); + PNI && PNI->hasNonNeg() && + TLI.isSExtCheaperThanZExt(N.getValueType(), DestVT)) { + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); + return; + } + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); } diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll index 7297bfaf0c62ec..20d73acddea01b 100644 --- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll @@ -501,8 +501,7 @@ define i64 @zext_nneg_i32_to_i64(i32 %a) nounwind { ; ; RV64-LABEL: zext_nneg_i32_to_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: ret %1 = zext nneg i32 %a to i64 ret i64 %1 From ef100c228a5913c6c54bfed8e80fd265ce8beca2 Mon Sep 17 00:00:00 2001 From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com> Date: Mon, 30 Oct 2023 16:04:41 -0700 Subject: [PATCH 093/144] [mlir][sparse] implements tensor.insert on sparse tensors. (#70737) --- .../SparseTensor/IR/SparseTensorType.h | 6 ++ .../Transforms/SparseReinterpretMap.cpp | 61 ++++++++++++++++++- .../Transforms/SparseTensorRewriting.cpp | 57 +++-------------- .../SparsificationAndBufferizationPass.cpp | 4 +- .../SparseTensor/convert_dense2sparse.mlir | 14 ++--- .../SparseTensor/convert_sparse2sparse.mlir | 6 +- .../Dialect/SparseTensor/sparse_concat.mlir | 12 ++-- 7 files changed, 95 insertions(+), 65 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h index 7a1f1e2144e049..0761cbee524073 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h @@ -251,6 +251,12 @@ class SparseTensorType { CrdTransDirectionKind::dim2lvl); } + RankedTensorType getDemappedType() const { + auto lvlShape = getLvlShape(); + return RankedTensorType::get(lvlShape, rtp.getElementType(), + enc.withoutDimToLvl()); + } + /// Safely looks up the requested dimension-DynSize. If you intend /// to check the result with `ShapedType::isDynamic`, then see the /// `getStaticDimSize` method instead. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp index 10722ccb6eea74..66fd2e4d94a28b 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp @@ -6,9 +6,15 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h" #include "mlir/Dialect/SparseTensor/Transforms/Passes.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/IR/AffineMap.h" + +using namespace mlir; +using namespace mlir::sparse_tensor; namespace { @@ -17,7 +23,60 @@ namespace { // (2) rewrite linalg.generic ops traits on level crds // (3) compute topsort, and resolve cyles with sparse_tensor.convert ops +//===----------------------------------------------------------------------===// +// Reiterpret Map Rewriters for operations other than linalg.generics +//===----------------------------------------------------------------------===// + +struct CrdTranslateRewriter : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(CrdTranslateOp op, + PatternRewriter &rewriter) const override { + AffineMap map = op.getDirection() == CrdTransDirectionKind::dim2lvl + ? op.getEncoder().getDimToLvl() + : op.getEncoder().getLvlToDim(); + SmallVector outCrds; + for (AffineExpr result : map.getResults()) { + // TODO: we should probably expand the affine map to IR using our own + // rules, since affine.apply assume signed value, while the cooridinates + // we provided must always be signless. + Value trans = rewriter.create( + op.getLoc(), AffineMap::get(map.getNumDims(), 0, result), + op.getInCrds()); + outCrds.push_back(trans); + } + rewriter.replaceOp(op, outCrds); + return success(); + } +}; + +struct TensorInsertRewriter : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(tensor::InsertOp op, + PatternRewriter &rewriter) const override { + + if (!op.getResult().getType().getEncoding()) + return failure(); + Location loc = op.getLoc(); + auto stt = getSparseTensorType(op.getResult()); + ValueRange lvlCrd = stt.translateCrds(rewriter, loc, op.getIndices(), + CrdTransDirectionKind::dim2lvl); + + Value t = rewriter.create( + loc, stt.getEncoding().withoutDimToLvl(), op.getDest()); + t = rewriter.create(loc, op.getScalar(), t, + lvlCrd); + rewriter.replaceOpWithNewOp(op, op.getType(), t); + return success(); + } +}; + } // namespace void mlir::populateSparseReinterpretMap(RewritePatternSet &patterns, - ReinterpretMapScope scope) {} + ReinterpretMapScope scope) { + if (scope == ReinterpretMapScope::kAll || + scope == ReinterpretMapScope::kExceptGeneric) { + patterns.add( + patterns.getContext()); + } +} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 528e70bd3b1ef5..02796bc9a7e7df 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -846,11 +846,7 @@ struct TensorLike { } void insert(OpBuilder &builder, Location loc, Value v, ValueRange crds) { - // TODO: Unify these two. - if (isSparse()) - val = builder.create(loc, v, val, crds); - else - val = builder.create(loc, v, val, crds); + val = builder.create(loc, v, val, crds); } Value finalize(OpBuilder &builder, Location loc, RankedTensorType rtp) const { @@ -866,28 +862,6 @@ struct TensorLike { Value val; }; -struct CrdTranslateRewriter : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(CrdTranslateOp op, - PatternRewriter &rewriter) const override { - AffineMap map = op.getDirection() == CrdTransDirectionKind::dim2lvl - ? op.getEncoder().getDimToLvl() - : op.getEncoder().getLvlToDim(); - SmallVector outCrds; - for (AffineExpr result : map.getResults()) { - // TODO: we should probably expand the affine map to IR using our own - // rules, since affine.apply assume signed value, while the cooridinates - // we provided must always be signless. - Value trans = rewriter.create( - op.getLoc(), AffineMap::get(map.getNumDims(), 0, result), - op.getInCrds()); - outCrds.push_back(trans); - } - rewriter.replaceOp(op, outCrds); - return success(); - } -}; - struct SparseTensorDimOpRewriter : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(tensor::DimOp op, @@ -939,7 +913,6 @@ struct ConcatenateRewriter : public OpRewritePattern { const Location loc = op.getLoc(); const auto dstTp = getSparseTensorType(op); - const Dimension dimRank = dstTp.getDimRank(); const Dimension conDim = op.getDimension(); SmallVector sizes; concatSizesFromInputs(rewriter, sizes, loc, dstTp, op.getInputs(), conDim); @@ -969,15 +942,10 @@ struct ConcatenateRewriter : public OpRewritePattern { loc, input, iterArg, [&](OpBuilder &builder, Location loc, ValueRange dcvs, Value v, ValueRange reduc) { - SmallVector dstLcvs(dstTp.getLvlRank()); - for (Dimension d = 0; d < dimRank; d++) { - Value crd = dcvs[d]; - // Transforms coordinates for the concatenating dim. - if (d == conDim) - crd = builder.create(loc, crd, offset); - // FIXME: `toStoredDim` is deprecated - dstLcvs[toStoredDim(dstTp.getEncoding(), d)] = crd; - } + SmallVector offDimCrd(dcvs); + offDimCrd[conDim] = + builder.create(loc, offDimCrd[conDim], offset); + // Enters foreach, updates the SSA chain. dstBuf.val = reduc.front(); if (!dstTp.isAllDense()) { @@ -988,14 +956,14 @@ struct ConcatenateRewriter : public OpRewritePattern { builder.create(loc, dstBuf.val); builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - dstBuf.insert(builder, loc, v, dstLcvs); + dstBuf.insert(builder, loc, v, offDimCrd); builder.create(loc, dstBuf.val); // Exits the ifOp, update the sparse tensor SSA value. builder.setInsertionPointAfter(ifOp); dstBuf.val = ifOp.getResult(0); } else { - dstBuf.insert(builder, loc, v, dstLcvs); + dstBuf.insert(builder, loc, v, offDimCrd); } builder.create(loc, dstBuf.val); }); @@ -1064,10 +1032,6 @@ struct DirectConvertRewriter : public OpRewritePattern { ValueRange reduc) { // Enters the loop, update the SSA value for insertion chain. dstBuf.val = reduc.front(); - - ValueRange lcvs = dstStt.translateCrds( - builder, loc, dcvs, CrdTransDirectionKind::dim2lvl); - if (!skipZeroCheck) { Value cond = genIsNonzero(builder, loc, v); auto ifOp = builder.create(loc, reduc.getTypes(), cond, @@ -1076,14 +1040,14 @@ struct DirectConvertRewriter : public OpRewritePattern { builder.create(loc, dstBuf.val); builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - dstBuf.insert(builder, loc, v, lcvs); + dstBuf.insert(builder, loc, v, dcvs); builder.create(loc, dstBuf.val); // Exits the ifOp, update the sparse tensor SSA value. builder.setInsertionPointAfter(ifOp); dstBuf.val = ifOp.getResult(0); } else { - dstBuf.insert(builder, loc, v, lcvs); + dstBuf.insert(builder, loc, v, dcvs); } builder.create(loc, dstBuf.val); }); @@ -1306,8 +1270,7 @@ void mlir::populatePreSparsificationRewriting(RewritePatternSet &patterns) { void mlir::populateLowerSparseOpsToForeachPatterns(RewritePatternSet &patterns, bool enableRT, bool enableConvert) { - patterns.add, + patterns.add, ReshapeRewriter, Sparse2SparseReshapeRewriter, Sparse2SparseReshapeRewriter, diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp index f3f3828e0c5bdf..41940f731e76c1 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp @@ -143,7 +143,9 @@ class SparsificationAndBufferizationPass pm.addNestedPass(createStageSparseOperationsPass()); pm.addPass(createLowerSparseOpsToForeachPass(enableRuntimeLibrary, /*enableConvert=*/true)); - // TODO: DemapPass here! + // Handle dim-to-lvl maps on operations other than linalg.generic. + pm.addPass( + createSparseReinterpretMapPass(ReinterpretMapScope::kExceptGeneric)); pm.addNestedPass(createLowerForeachToSCFPass()); if (vectorLength > 0) { pm.addPass(mlir::createLoopInvariantCodeMotionPass()); diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir index 4f37ae9207be9c..96a1140372bd6c 100644 --- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir @@ -19,7 +19,7 @@ // CHECK-LABEL: func.func @sparse_convert_1d // CHECK: sparse_tensor.foreach // CHECK: scf.if -// CHECK: sparse_tensor.insert +// CHECK: tensor.insert // CHECK-NOT: sparse_tensor.reorder_coo // CHECK: sparse_tensor.load func.func @sparse_convert_1d(%arg0: tensor) -> tensor { @@ -30,7 +30,7 @@ func.func @sparse_convert_1d(%arg0: tensor) -> tensor>) -> tensor<100xcomplex, #SparseVector> { @@ -41,7 +41,7 @@ func.func @sparse_convert_complex(%arg0: tensor<100xcomplex>) -> tensor<100 // CHECK-LABEL: func.func @sparse_convert_2d // CHECK: sparse_tensor.foreach // CHECK: scf.if -// CHECK: sparse_tensor.insert +// CHECK: tensor.insert // CHECK-NOT: sparse_tensor.reorder_coo // CHECK: sparse_tensor.load func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { @@ -52,7 +52,7 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { // CHECK-LABEL: func.func @sparse_constant // CHECK: sparse_tensor.foreach // CHECK-NOT: scf.if -// CHECK: sparse_tensor.insert +// CHECK: tensor.insert // CHECK-NOT: sparse_tensor.reorder_coo // CHECK: sparse_tensor.load func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{ @@ -66,7 +66,7 @@ func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{ // CHECK-LABEL: func.func @sparse_constant_csc // CHECK: sparse_tensor.foreach // CHECK-NOT: scf.if -// CHECK: sparse_tensor.insert +// CHECK: tensor.insert // CHECK-NOT: sparse_tensor.reorder_coo // CHECK: sparse_tensor.load func.func @sparse_constant_csc() -> tensor<8x7xf32, #CSC>{ @@ -80,11 +80,11 @@ func.func @sparse_constant_csc() -> tensor<8x7xf32, #CSC>{ // CHECK-LABEL: func.func @sparse_convert_3d // CHECK: sparse_tensor.foreach // CHECK: scf.if -// CHECK: sparse_tensor.insert +// CHECK: tensor.insert // CHECK: sparse_tensor.load // CHECK: sparse_tensor.reorder_coo // CHECK: sparse_tensor.foreach -// CHECK: sparse_tensor.insert +// CHECK: tensor.insert // CHECK: sparse_tensor.load func.func @sparse_convert_3d(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir index 896bc02212971f..0673f915a1cf62 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir @@ -66,11 +66,11 @@ func.func @sparse_convert(%arg0: tensor) -> tensor) -> tensor { @@ -80,7 +80,7 @@ func.func @sparse_convert_permuted(%arg0: tensor) -> te // CHECK-LABEL: func.func @sparse_convert_slice // CHECK: sparse_tensor.foreach -// CHECK: sparse_tensor.insert +// CHECK: tensor.insert // CHECK: sparse_tensor.load // CHECK-NOT: sparse_tensor.reorder_coo // CHECK: return diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir index e4e2748112d78c..86dc9a11750713 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir @@ -30,7 +30,7 @@ // CHECK: %[[RET_4:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A1:.*]] = %[[A0]]) // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref -// CHECK: %[[NEW_1:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[NEW_1:.*]] = tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor // CHECK: scf.yield %[[NEW_1]] // CHECK: } // CHECK: scf.yield %[[RET_4]] @@ -51,7 +51,7 @@ // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index -// CHECK: %[[NEW_2:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[NEW_2:.*]] = tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor // CHECK: scf.yield %[[NEW_2]] // CHECK: } // CHECK: scf.yield %[[RET_5]] @@ -72,7 +72,7 @@ // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index -// CHECK: %[[NEW_3:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[NEW_3:.*]] = tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor // CHECK: scf.yield %[[NEW_3]] // CHECK: } // CHECK: scf.yield %[[RET_6]] @@ -116,7 +116,7 @@ func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>, // CHECK: %[[RET_4:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A1:.*]] = %[[A0]]) // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref -// CHECK: %[[NEW_1:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor, // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index -// CHECK: %[[NEW_2:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor, // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index -// CHECK: %[[NEW_3:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor Date: Fri, 27 Oct 2023 22:48:08 +0200 Subject: [PATCH 094/144] [clang] Change GetCharAndSizeSlow interface to by-value style Instead of passing the Size by reference, assuming it is initialized, return it alongside the expected char result as a POD. This makes the interface less error prone: previous interface expected the Size reference to be initialized, and it was often forgotten, leading to uninitialized variable usage. This patch fixes the issue. This also generates faster code, as the returned POD (a char and an unsigned) fits in 64 bits. The speedup according to compile time tracker reach -O.7%, with a good number of -0.4%. Details are available on https://llvm-compile-time-tracker.com/compare.php?from=3fe63f81fcb999681daa11b2890c82fda3aaeef5&to=fc76a9202f737472ecad4d6e0b0bf87a013866f3&stat=instructions:u And icing on the cake, on my setup it also shaves 2kB out of libclang-cpp :-) This is a recommit of d8f5a18b6e587aeaa8b99707e87b652f49b160cd for --- clang-tools-extra/pseudo/lib/Lex.cpp | 6 +- clang/include/clang/Lex/Lexer.h | 35 ++++----- clang/lib/Lex/DependencyDirectivesScanner.cpp | 5 +- clang/lib/Lex/Lexer.cpp | 73 ++++++++++--------- 4 files changed, 63 insertions(+), 56 deletions(-) diff --git a/clang-tools-extra/pseudo/lib/Lex.cpp b/clang-tools-extra/pseudo/lib/Lex.cpp index 4b89ad017ef1f8..2111476f04dc5b 100644 --- a/clang-tools-extra/pseudo/lib/Lex.cpp +++ b/clang-tools-extra/pseudo/lib/Lex.cpp @@ -87,9 +87,9 @@ TokenStream cook(const TokenStream &Code, const LangOptions &LangOpts) { llvm::SmallString<64> CleanBuffer; const char *Pos = Tok.text().begin(); while (Pos < Tok.text().end()) { - unsigned CharSize = 0; - CleanBuffer.push_back( - clang::Lexer::getCharAndSizeNoWarn(Pos, CharSize, LangOpts)); + auto [Char, CharSize] = + clang::Lexer::getCharAndSizeNoWarn(Pos, LangOpts); + CleanBuffer.push_back(Char); assert(CharSize != 0 && "no progress!"); Pos += CharSize; } diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index ac0ef14c591bdd..899e665e745465 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -575,19 +575,23 @@ class Lexer : public PreprocessorLexer { /// sequence. static bool isNewLineEscaped(const char *BufferStart, const char *Str); + /// Represents a char and the number of bytes parsed to produce it. + struct SizedChar { + char Char; + unsigned Size; + }; + /// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever /// emit a warning. - static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, - const LangOptions &LangOpts) { + static inline SizedChar getCharAndSizeNoWarn(const char *Ptr, + const LangOptions &LangOpts) { // If this is not a trigraph and not a UCN or escaped newline, return // quickly. if (isObviouslySimpleCharacter(Ptr[0])) { - Size = 1; - return *Ptr; + return {*Ptr, 1u}; } - Size = 0; - return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts); + return getCharAndSizeSlowNoWarn(Ptr, LangOpts); } /// Returns the leading whitespace for line that corresponds to the given @@ -665,8 +669,7 @@ class Lexer : public PreprocessorLexer { // quickly. if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++; - unsigned Size = 0; - char C = getCharAndSizeSlow(Ptr, Size, &Tok); + auto [C, Size] = getCharAndSizeSlow(Ptr, &Tok); Ptr += Size; return C; } @@ -682,9 +685,7 @@ class Lexer : public PreprocessorLexer { // Otherwise, re-lex the character with a current token, allowing // diagnostics to be emitted and flags to be set. - Size = 0; - getCharAndSizeSlow(Ptr, Size, &Tok); - return Ptr+Size; + return Ptr + getCharAndSizeSlow(Ptr, &Tok).Size; } /// getCharAndSize - Peek a single 'character' from the specified buffer, @@ -699,14 +700,14 @@ class Lexer : public PreprocessorLexer { return *Ptr; } - Size = 0; - return getCharAndSizeSlow(Ptr, Size); + auto CharAndSize = getCharAndSizeSlow(Ptr); + Size = CharAndSize.Size; + return CharAndSize.Char; } /// getCharAndSizeSlow - Handle the slow/uncommon case of the getCharAndSize /// method. - char getCharAndSizeSlow(const char *Ptr, unsigned &Size, - Token *Tok = nullptr); + SizedChar getCharAndSizeSlow(const char *Ptr, Token *Tok = nullptr); /// getEscapedNewLineSize - Return the size of the specified escaped newline, /// or 0 if it is not an escaped newline. P[-1] is known to be a "\" on entry @@ -720,8 +721,8 @@ class Lexer : public PreprocessorLexer { /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a /// diagnostic. - static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, - const LangOptions &LangOpts); + static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr, + const LangOptions &LangOpts); //===--------------------------------------------------------------------===// // Other lexer functions. diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index 2bd2c5f8388c0d..980f865cf24c97 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -565,9 +565,8 @@ Scanner::cleanStringIfNeeded(const dependency_directives_scan::Token &Tok) { const char *BufPtr = Input.begin() + Tok.Offset; const char *AfterIdent = Input.begin() + Tok.getEnd(); while (BufPtr < AfterIdent) { - unsigned Size; - Spelling[SpellingLength++] = - Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); + auto [Char, Size] = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts); + Spelling[SpellingLength++] = Char; BufPtr += Size; } diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 675ec28e514797..1c53997527732a 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -287,9 +287,9 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, if (tok::isStringLiteral(Tok.getKind())) { // Munch the encoding-prefix and opening double-quote. while (BufPtr < BufEnd) { - unsigned Size; - Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); - BufPtr += Size; + auto CharAndSize = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts); + Spelling[Length++] = CharAndSize.Char; + BufPtr += CharAndSize.Size; if (Spelling[Length - 1] == '"') break; @@ -316,9 +316,9 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, } while (BufPtr < BufEnd) { - unsigned Size; - Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); - BufPtr += Size; + auto CharAndSize = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts); + Spelling[Length++] = CharAndSize.Char; + BufPtr += CharAndSize.Size; } assert(Length < Tok.getLength() && @@ -772,10 +772,9 @@ unsigned Lexer::getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, // If we have a character that may be a trigraph or escaped newline, use a // lexer to parse it correctly. for (; CharNo; --CharNo) { - unsigned Size; - Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts); - TokPtr += Size; - PhysOffset += Size; + auto CharAndSize = Lexer::getCharAndSizeNoWarn(TokPtr, LangOpts); + TokPtr += CharAndSize.Size; + PhysOffset += CharAndSize.Size; } // Final detail: if we end up on an escaped newline, we want to return the @@ -1357,15 +1356,16 @@ SourceLocation Lexer::findLocationAfterToken( /// /// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should /// be updated to match. -char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, - Token *Tok) { +Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) { + unsigned Size = 0; // If we have a slash, look for an escaped newline. if (Ptr[0] == '\\') { ++Size; ++Ptr; Slash: // Common case, backslash-char where the char is not whitespace. - if (!isWhitespace(Ptr[0])) return '\\'; + if (!isWhitespace(Ptr[0])) + return {'\\', Size}; // See if we have optional whitespace characters between the slash and // newline. @@ -1382,11 +1382,13 @@ char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, Ptr += EscapedNewLineSize; // Use slow version to accumulate a correct size field. - return getCharAndSizeSlow(Ptr, Size, Tok); + auto CharAndSize = getCharAndSizeSlow(Ptr, Tok); + CharAndSize.Size += Size; + return CharAndSize; } // Otherwise, this is not an escaped newline, just return the slash. - return '\\'; + return {'\\', Size}; } // If this is a trigraph, process it. @@ -1401,13 +1403,12 @@ char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, Ptr += 3; Size += 3; if (C == '\\') goto Slash; - return C; + return {C, Size}; } } // If this is neither, return a single character. - ++Size; - return *Ptr; + return {*Ptr, Size + 1u}; } /// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the @@ -1416,15 +1417,18 @@ char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, /// /// NOTE: When this method is updated, getCharAndSizeSlow (above) should /// be updated to match. -char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, - const LangOptions &LangOpts) { +Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, + const LangOptions &LangOpts) { + + unsigned Size = 0; // If we have a slash, look for an escaped newline. if (Ptr[0] == '\\') { ++Size; ++Ptr; Slash: // Common case, backslash-char where the char is not whitespace. - if (!isWhitespace(Ptr[0])) return '\\'; + if (!isWhitespace(Ptr[0])) + return {'\\', Size}; // See if we have optional whitespace characters followed by a newline. if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) { @@ -1433,11 +1437,13 @@ char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, Ptr += EscapedNewLineSize; // Use slow version to accumulate a correct size field. - return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts); + auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts); + CharAndSize.Size += Size; + return CharAndSize; } // Otherwise, this is not an escaped newline, just return the slash. - return '\\'; + return {'\\', Size}; } // If this is a trigraph, process it. @@ -1448,13 +1454,12 @@ char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, Ptr += 3; Size += 3; if (C == '\\') goto Slash; - return C; + return {C, Size}; } } // If this is neither, return a single character. - ++Size; - return *Ptr; + return {*Ptr, Size + 1u}; } //===----------------------------------------------------------------------===// @@ -1964,11 +1969,14 @@ bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { /// isHexaLiteral - Return true if Start points to a hex constant. /// in microsoft mode (where this is supposed to be several different tokens). bool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) { - unsigned Size; - char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts); + auto CharAndSize1 = Lexer::getCharAndSizeNoWarn(Start, LangOpts); + char C1 = CharAndSize1.Char; if (C1 != '0') return false; - char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, LangOpts); + + auto CharAndSize2 = + Lexer::getCharAndSizeNoWarn(Start + CharAndSize1.Size, LangOpts); + char C2 = CharAndSize2.Char; return (C2 == 'x' || C2 == 'X'); } @@ -2012,8 +2020,7 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { // If we have a digit separator, continue. if (C == '\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) { - unsigned NextSize; - char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, LangOpts); + auto [Next, NextSize] = getCharAndSizeNoWarn(CurPtr + Size, LangOpts); if (isAsciiIdentifierContinue(Next)) { if (!isLexingRawMode()) Diag(CurPtr, LangOpts.CPlusPlus @@ -2085,8 +2092,8 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr, unsigned Consumed = Size; unsigned Chars = 1; while (true) { - unsigned NextSize; - char Next = getCharAndSizeNoWarn(CurPtr + Consumed, NextSize, LangOpts); + auto [Next, NextSize] = + getCharAndSizeNoWarn(CurPtr + Consumed, LangOpts); if (!isAsciiIdentifierContinue(Next)) { // End of suffix. Check whether this is on the allowed list. const StringRef CompleteSuffix(Buffer, Chars); From 6cc363ed7c9b56a2f3d26d0a88d72b32c1ce9c34 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Mon, 30 Oct 2023 16:12:40 -0700 Subject: [PATCH 095/144] [CUDA, NVPTX] accept/ignore any -mcmodel arguments. (#70740) Code model has no impact on NVPTX as we do not produce any object files, but we need to avoid erroring out on the -mcmodel argument passed to the top-level compilation and propagated to all sub-compilations. --- clang/lib/Driver/ToolChains/Clang.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 43a92adbef64ba..fb90fcd033b1ac 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5743,6 +5743,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } else if (Triple.getArch() == llvm::Triple::x86_64) { Ok = llvm::is_contained({"small", "kernel", "medium", "large", "tiny"}, CM); + } else if (Triple.isNVPTX()) { + // NVPTX does not care about the code model and will accept whatever works + // for the host. + Ok = true; } if (Ok) { CmdArgs.push_back(Args.MakeArgString("-mcmodel=" + CM)); From 8a786be384fb1279413bd123ce2dfeee9c008bc4 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Mon, 30 Oct 2023 16:29:46 -0700 Subject: [PATCH 096/144] [lldb] Fix misleading indentiation warning in ScriptInterpreterPython (NFC) (#70732) This should silence the "misleading indentiation" warnings introduced by b2929be, by adding an no-op if-statement, if the surrounding if-statement have been compiled out. Signed-off-by: Med Ismail Bennani --- .../ScriptInterpreter/Python/ScriptInterpreterPython.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 968cc8ca03001e..953f8b3aba18f7 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -183,6 +183,8 @@ struct InitializePythonRAII { // Python 3.13. It has been returning `true` always since Python 3.7. #if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 9) || (PY_MAJOR_VERSION < 3) if (PyEval_ThreadsInitialized()) { +#else + if (true) { #endif Log *log = GetLog(LLDBLog::Script); @@ -199,6 +201,8 @@ struct InitializePythonRAII { // InitThreads acquires the GIL if it hasn't been called before. PyEval_InitThreads(); +#else + } #endif } From 784a2cd561acf3cf532cf182221be0826b0ec6a1 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 30 Oct 2023 16:35:30 -0700 Subject: [PATCH 097/144] [RISCV] Rewrite RISCVCodeGenPrepare using zext nneg [nfc-ish] (#70739) This stacks on #70725. Once we have lowering for zext nneg, we can rewrite all of the existing RISCVCodeGenPrepare login in terms of zext nneg instead of sext. The change isn't NFC from the perspective of the individual pass, but should be from the perspective of codegen as a whole. As noted in the TODO, one piece can be moved to instcombine, but I'll leave that to a separate commit. --- llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 50 ++++++++----------- .../CodeGen/RISCV/riscv-codegenprepare.ll | 4 +- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 2fcd9a40588a73..7bc7e3924ca702 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -62,38 +62,32 @@ bool RISCVCodeGenPrepare::visitZExtInst(ZExtInst &ZExt) { if (!ST->is64Bit()) return false; + if (ZExt.hasNonNeg()) + return false; + Value *Src = ZExt.getOperand(0); // We only care about ZExt from i32 to i64. if (!ZExt.getType()->isIntegerTy(64) || !Src->getType()->isIntegerTy(32)) return false; - // Look for an opportunity to replace (i64 (zext (i32 X))) with a sext if we - // can determine that the sign bit of X is zero via a dominating condition. - // This often occurs with widened induction variables. + // Look for an opportunity to infer nneg on a zext if we can determine that + // the sign bit of X is zero via a dominating condition. This often occurs + // with widened induction variables. if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src, Constant::getNullValue(Src->getType()), &ZExt, *DL).value_or(false)) { - auto *SExt = new SExtInst(Src, ZExt.getType(), "", &ZExt); - SExt->takeName(&ZExt); - SExt->setDebugLoc(ZExt.getDebugLoc()); - - ZExt.replaceAllUsesWith(SExt); - ZExt.eraseFromParent(); + ZExt.setNonNeg(true); ++NumZExtToSExt; return true; } - // Convert (zext (abs(i32 X, i1 1))) -> (sext (abs(i32 X, i1 1))). If abs of + // Convert (zext (abs(i32 X, i1 1))) -> (zext nneg (abs(i32 X, i1 1))). If abs of // INT_MIN is poison, the sign bit is zero. + // TODO: Move this to instcombine now that we have zext nneg in IR. using namespace PatternMatch; if (match(Src, m_Intrinsic(m_Value(), m_One()))) { - auto *SExt = new SExtInst(Src, ZExt.getType(), "", &ZExt); - SExt->takeName(&ZExt); - SExt->setDebugLoc(ZExt.getDebugLoc()); - - ZExt.replaceAllUsesWith(SExt); - ZExt.eraseFromParent(); + ZExt.setNonNeg(true); ++NumZExtToSExt; return true; } @@ -102,9 +96,8 @@ bool RISCVCodeGenPrepare::visitZExtInst(ZExtInst &ZExt) { } // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set, -// but bits 63:32 are zero. If we can prove that bit 31 of X is 0, we can fill -// the upper 32 bits with ones. A separate transform will turn (zext X) into -// (sext X) for the same condition. +// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill +// the upper 32 bits with ones. bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) { if (!ST->is64Bit()) return false; @@ -112,9 +105,17 @@ bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) { if (!BO.getType()->isIntegerTy(64)) return false; - // Left hand side should be sext or zext. + auto canBeSignExtend = [](Instruction *I) { + if (isa(I)) + return true; + if (isa(I)) + return I->hasNonNeg(); + return false; + }; + + // Left hand side should be a sext or zext nneg. Instruction *LHS = dyn_cast(BO.getOperand(0)); - if (!LHS || (!isa(LHS) && !isa(LHS))) + if (!LHS || !canBeSignExtend(LHS)) return false; Value *LHSSrc = LHS->getOperand(0); @@ -135,13 +136,6 @@ bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) { if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C))) return false; - // If we can determine the sign bit of the input is 0, we can replace the - // And mask constant. - if (!isImpliedByDomCondition(ICmpInst::ICMP_SGE, LHSSrc, - Constant::getNullValue(LHSSrc->getType()), - LHS, *DL).value_or(false)) - return false; - // Sign extend the constant and replace the And operand. C = SignExtend64<32>(C); BO.setOperand(1, ConstantInt::get(LHS->getType(), C)); diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll index b7530c80c417a1..b4f0918635650b 100644 --- a/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll +++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll @@ -9,7 +9,7 @@ define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) { ; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -60,7 +60,7 @@ define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) { ; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 1 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[N]], 1 ; CHECK-NEXT: br i1 [[TMP0]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] From c2f642d90d33a4e6c987b52e22eca4221c86c601 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Mon, 30 Oct 2023 16:41:08 -0700 Subject: [PATCH 098/144] [flang] Derived type structural equivalence (#69376) F'202X 7.5.2.4 describes conditions under which two derived type definitions are to be considered equivalent. These rules are already implemented in Evaluate/type.cpp but not exposed for general use; rearrange the code a little so that the compatibility checking of separate module procedure interfaces and explicit definitions can use it to avoid emitting a bogus error message. Fixes https://github.com/llvm/llvm-project/issues/67946. --- flang/include/flang/Evaluate/type.h | 4 + flang/lib/Evaluate/type.cpp | 15 +++- flang/lib/Semantics/check-declarations.cpp | 5 +- flang/test/Semantics/separate-mp02.f90 | 10 +-- flang/test/Semantics/separate-mp03.f90 | 2 +- flang/test/Semantics/separate-mp06.f90 | 98 ++++++++++++++++++++++ 6 files changed, 124 insertions(+), 10 deletions(-) create mode 100644 flang/test/Semantics/separate-mp06.f90 diff --git a/flang/include/flang/Evaluate/type.h b/flang/include/flang/Evaluate/type.h index ff784ef51f9024..99916aaf39978f 100644 --- a/flang/include/flang/Evaluate/type.h +++ b/flang/include/flang/Evaluate/type.h @@ -208,6 +208,10 @@ class DynamicType { // SAME_TYPE_AS (16.9.165); ignores type parameter values std::optional SameTypeAs(const DynamicType &) const; + // 7.5.2.4 type equivalence; like operator==(), but SEQUENCE/BIND(C) + // derived types can be structurally equivalent. + bool IsEquivalentTo(const DynamicType &) const; + // Result will be missing when a symbol is absent or // has an erroneous type, e.g., REAL(KIND=666). static std::optional From(const semantics::DeclTypeSpec &); diff --git a/flang/lib/Evaluate/type.cpp b/flang/lib/Evaluate/type.cpp index dbadc07fdbbc59..82b8f28f961dd5 100644 --- a/flang/lib/Evaluate/type.cpp +++ b/flang/lib/Evaluate/type.cpp @@ -293,7 +293,7 @@ const semantics::DerivedTypeSpec *GetParentTypeSpec( } // Compares two derived type representations to see whether they both -// represent the "same type" in the sense of section 7.5.2.4. +// represent the "same type" in the sense of section F'2023 7.5.2.4. using SetOfDerivedTypePairs = std::set>; @@ -513,6 +513,19 @@ bool AreSameDerivedType( return AreSameDerivedType(x, y, false, false, inProgress); } +bool AreSameDerivedType( + const semantics::DerivedTypeSpec *x, const semantics::DerivedTypeSpec *y) { + return x == y || (x && y && AreSameDerivedType(*x, *y)); +} + +bool DynamicType::IsEquivalentTo(const DynamicType &that) const { + return category_ == that.category_ && kind_ == that.kind_ && + PointeeComparison(charLengthParamValue_, that.charLengthParamValue_) && + knownLength().has_value() == that.knownLength().has_value() && + (!knownLength() || *knownLength() == *that.knownLength()) && + AreSameDerivedType(derived_, that.derived_); +} + static bool AreCompatibleDerivedTypes(const semantics::DerivedTypeSpec *x, const semantics::DerivedTypeSpec *y, bool isPolymorphic, bool ignoreTypeParameterValues, bool ignoreLenTypeParameters) { diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 2c2866d590ae5a..ce16b2df54b050 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -3354,10 +3354,9 @@ void SubprogramMatchHelper::CheckDummyDataObject(const Symbol &symbol1, const DummyDataObject &obj2) { if (!CheckSameIntent(symbol1, symbol2, obj1.intent, obj2.intent)) { } else if (!CheckSameAttrs(symbol1, symbol2, obj1.attrs, obj2.attrs)) { - } else if (obj1.type.type() != obj2.type.type()) { + } else if (!obj1.type.type().IsEquivalentTo(obj2.type.type())) { Say(symbol1, symbol2, - "Dummy argument '%s' has type %s; the corresponding argument in the" - " interface body has type %s"_err_en_US, + "Dummy argument '%s' has type %s; the corresponding argument in the interface body has distinct type %s"_err_en_US, obj1.type.type().AsFortran(), obj2.type.type().AsFortran()); } else if (!ShapesAreCompatible(obj1, obj2)) { Say(symbol1, symbol2, diff --git a/flang/test/Semantics/separate-mp02.f90 b/flang/test/Semantics/separate-mp02.f90 index fd9c4c3cc18f98..39a469b6ccc09e 100644 --- a/flang/test/Semantics/separate-mp02.f90 +++ b/flang/test/Semantics/separate-mp02.f90 @@ -51,9 +51,9 @@ module subroutine s5(x, y) real :: y end module subroutine s6(x, y) - !ERROR: Dummy argument 'x' has type INTEGER(4); the corresponding argument in the interface body has type REAL(4) + !ERROR: Dummy argument 'x' has type INTEGER(4); the corresponding argument in the interface body has distinct type REAL(4) integer :: x - !ERROR: Dummy argument 'y' has type REAL(8); the corresponding argument in the interface body has type REAL(4) + !ERROR: Dummy argument 'y' has type REAL(8); the corresponding argument in the interface body has distinct type REAL(4) real(8) :: y end module subroutine s7(x, y, z) @@ -72,10 +72,10 @@ module subroutine s8(x, y, z) end module subroutine s9(x, y, z, w) character(len=4) :: x - !ERROR: Dummy argument 'y' has type CHARACTER(KIND=1,LEN=5_8); the corresponding argument in the interface body has type CHARACTER(KIND=1,LEN=4_8) + !ERROR: Dummy argument 'y' has type CHARACTER(KIND=1,LEN=5_8); the corresponding argument in the interface body has distinct type CHARACTER(KIND=1,LEN=4_8) character(len=5) :: y character(len=*) :: z - !ERROR: Dummy argument 'w' has type CHARACTER(KIND=1,LEN=4_8); the corresponding argument in the interface body has type CHARACTER(KIND=1,LEN=*) + !ERROR: Dummy argument 'w' has type CHARACTER(KIND=1,LEN=4_8); the corresponding argument in the interface body has distinct type CHARACTER(KIND=1,LEN=*) character(len=4) :: w end end @@ -330,7 +330,7 @@ module subroutine sub1(s) character(len=-1) s ! ok end subroutine module subroutine sub2(s) - !ERROR: Dummy argument 's' has type CHARACTER(KIND=1,LEN=1_8); the corresponding argument in the interface body has type CHARACTER(KIND=1,LEN=0_8) + !ERROR: Dummy argument 's' has type CHARACTER(KIND=1,LEN=1_8); the corresponding argument in the interface body has distinct type CHARACTER(KIND=1,LEN=0_8) character(len=1) s end subroutine end submodule diff --git a/flang/test/Semantics/separate-mp03.f90 b/flang/test/Semantics/separate-mp03.f90 index 33bf1cf8e414fd..1bbeced44a4f7a 100644 --- a/flang/test/Semantics/separate-mp03.f90 +++ b/flang/test/Semantics/separate-mp03.f90 @@ -74,7 +74,7 @@ pure module subroutine s2 end interface contains integer module function f1(x) - !ERROR: Dummy argument 'x' has type INTEGER(4); the corresponding argument in the interface body has type REAL(4) + !ERROR: Dummy argument 'x' has type INTEGER(4); the corresponding argument in the interface body has distinct type REAL(4) integer, intent(in) :: x f1 = x end function diff --git a/flang/test/Semantics/separate-mp06.f90 b/flang/test/Semantics/separate-mp06.f90 new file mode 100644 index 00000000000000..9c76466d726dc6 --- /dev/null +++ b/flang/test/Semantics/separate-mp06.f90 @@ -0,0 +1,98 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! Structural equivalence of derived type definitions +module m + interface + module subroutine s1(x) + type :: nonseq + integer :: n + end type + type(nonseq), intent(in) :: x + end subroutine + module subroutine s2(x) + type :: seq + sequence + integer :: n + end type + type(seq), intent(in) :: x + end subroutine + module subroutine s3(x) + type :: chlen + sequence + character(2) :: s + end type + type(chlen), intent(in) :: x + end subroutine + module subroutine s4(x) + !ERROR: A sequence type may not have type parameters + type :: pdt(k) + integer, kind :: k + sequence + real(k) :: a + end type + type(pdt(4)), intent(in) :: x + end subroutine + end interface +end module + +submodule(m) sm + contains + module subroutine s1(x) + type :: nonseq + integer :: n + end type + !ERROR: Dummy argument 'x' has type nonseq; the corresponding argument in the interface body has distinct type nonseq + type(nonseq), intent(in) :: x + end subroutine + module subroutine s2(x) ! ok + type :: seq + sequence + integer :: n + end type + type(seq), intent(in) :: x + end subroutine + module subroutine s3(x) + type :: chlen + sequence + character(3) :: s ! note: length is 3, not 2 + end type + !ERROR: Dummy argument 'x' has type chlen; the corresponding argument in the interface body has distinct type chlen + type(chlen), intent(in) :: x + end subroutine + module subroutine s4(x) + !ERROR: A sequence type may not have type parameters + type :: pdt(k) + integer, kind :: k + sequence + real(k) :: a + end type + !ERROR: Dummy argument 'x' has type pdt(k=4_4); the corresponding argument in the interface body has distinct type pdt(k=4_4) + type(pdt(4)), intent(in) :: x + end subroutine +end submodule + +program main + use m + type :: nonseq + integer :: n + end type + type :: seq + sequence + integer :: n + end type + type :: chlen + sequence + character(2) :: s + end type + !ERROR: A sequence type may not have type parameters + type :: pdt(k) + integer, kind :: k + sequence + real(k) :: a + end type + !ERROR: Actual argument type 'nonseq' is not compatible with dummy argument type 'nonseq' + call s1(nonseq(1)) + call s2(seq(1)) ! ok + call s3(chlen('ab')) ! ok, matches interface + !ERROR: Actual argument type 'pdt(k=4_4)' is not compatible with dummy argument type 'pdt(k=4_4)' + call s4(pdt(4)(3.14159)) +end program From 4b3cd379cce3f455bf3c8677ca7a5be6e708a4ce Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Mon, 30 Oct 2023 16:52:17 -0700 Subject: [PATCH 099/144] [lldb] Make use of Scripted{Python,}Interface for ScriptedThreadPlan (#70392) This patch makes ScriptedThreadPlan conforming to the ScriptedInterface & ScriptedPythonInterface facilities by introducing 2 ScriptedThreadPlanInterface & ScriptedThreadPlanPythonInterface classes. This allows us to get rid of every ScriptedThreadPlan-specific SWIG method and re-use the same affordances as other scripting offordances, like Scripted{Process,Thread,Platform} & OperatingSystem. To do so, this adds new transformer methods for `ThreadPlan`, `Stream` & `Event`, to allow the bijection between C++ objects and their python counterparts. Signed-off-by: Med Ismail Bennani --- lldb/bindings/python/python-swigsafecast.swig | 13 +- lldb/bindings/python/python-wrapper.swig | 153 +++--------------- lldb/include/lldb/API/SBEvent.h | 4 +- lldb/include/lldb/API/SBStream.h | 9 ++ .../Interfaces/ScriptedInterface.h | 4 +- .../Interfaces/ScriptedThreadPlanInterface.h | 40 +++++ .../lldb/Interpreter/ScriptInterpreter.h | 56 ++----- lldb/include/lldb/Target/ThreadPlanPython.h | 2 + lldb/include/lldb/lldb-forward.h | 3 + lldb/source/Interpreter/ScriptInterpreter.cpp | 13 ++ .../Python/Interfaces/CMakeLists.txt | 1 + .../ScriptedPlatformPythonInterface.cpp | 2 + .../Interfaces/ScriptedPythonInterface.cpp | 34 +++- .../Interfaces/ScriptedPythonInterface.h | 20 +++ .../ScriptedThreadPlanPythonInterface.cpp | 92 +++++++++++ .../ScriptedThreadPlanPythonInterface.h | 44 +++++ .../ScriptedThreadPythonInterface.cpp | 1 + .../Python/SWIGPythonBridge.h | 21 +-- .../Python/ScriptInterpreterPython.cpp | 122 +------------- .../Python/ScriptInterpreterPythonImpl.h | 28 +--- lldb/source/Target/ThreadPlanPython.cpp | 97 ++++++----- .../functionalities/step_scripted/Steps.py | 4 +- .../Python/PythonTestSuite.cpp | 45 +++--- 23 files changed, 401 insertions(+), 407 deletions(-) create mode 100644 lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h create mode 100644 lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp create mode 100644 lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.h diff --git a/lldb/bindings/python/python-swigsafecast.swig b/lldb/bindings/python/python-swigsafecast.swig index d5ea5148727134..fba3a77d8f2df4 100644 --- a/lldb/bindings/python/python-swigsafecast.swig +++ b/lldb/bindings/python/python-swigsafecast.swig @@ -37,10 +37,6 @@ PythonObject SWIGBridge::ToSWIGWrapper(const Status& status) { return ToSWIGHelper(new lldb::SBError(status), SWIGTYPE_p_lldb__SBError); } -PythonObject SWIGBridge::ToSWIGWrapper(std::unique_ptr stream_sb) { - return ToSWIGHelper(stream_sb.release(), SWIGTYPE_p_lldb__SBStream); -} - PythonObject SWIGBridge::ToSWIGWrapper(std::unique_ptr data_sb) { return ToSWIGHelper(data_sb.release(), SWIGTYPE_p_lldb__SBStructuredData); } @@ -115,9 +111,12 @@ SWIGBridge::ToSWIGWrapper(CommandReturnObject &cmd_retobj) { SWIGTYPE_p_lldb__SBCommandReturnObject); } -ScopedPythonObject SWIGBridge::ToSWIGWrapper(Event *event) { - return ScopedPythonObject(new lldb::SBEvent(event), - SWIGTYPE_p_lldb__SBEvent); +PythonObject SWIGBridge::ToSWIGWrapper(const Stream *s) { + return ToSWIGHelper(new lldb::SBStream(), SWIGTYPE_p_lldb__SBStream); +} + +PythonObject SWIGBridge::ToSWIGWrapper(Event *event) { + return ToSWIGHelper(new lldb::SBEvent(event), SWIGTYPE_p_lldb__SBEvent); } PythonObject SWIGBridge::ToSWIGWrapper( diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index 17bc7b1f219870..5c28d652824073 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -229,133 +229,6 @@ PythonObject lldb_private::python::SWIGBridge::LLDBSwigPythonCreateCommandObject return pfunc(SWIGBridge::ToSWIGWrapper(std::move(debugger_sp)), dict); } -PythonObject lldb_private::python::SWIGBridge::LLDBSwigPythonCreateScriptedThreadPlan( - const char *python_class_name, const char *session_dictionary_name, - const lldb_private::StructuredDataImpl &args_impl, - std::string &error_string, const lldb::ThreadPlanSP &thread_plan_sp) { - if (python_class_name == NULL || python_class_name[0] == '\0' || - !session_dictionary_name) - return PythonObject(); - - PyErr_Cleaner py_err_cleaner(true); - - auto dict = PythonModule::MainModule().ResolveName( - session_dictionary_name); - auto pfunc = PythonObject::ResolveNameWithDictionary( - python_class_name, dict); - - if (!pfunc.IsAllocated()) { - error_string.append("could not find script class: "); - error_string.append(python_class_name); - return PythonObject(); - } - - PythonObject tp_arg = SWIGBridge::ToSWIGWrapper(thread_plan_sp); - - llvm::Expected arg_info = pfunc.GetArgInfo(); - if (!arg_info) { - llvm::handleAllErrors( - arg_info.takeError(), - [&](PythonException &E) { error_string.append(E.ReadBacktrace()); }, - [&](const llvm::ErrorInfoBase &E) { - error_string.append(E.message()); - }); - return PythonObject(); - } - - PythonObject result = {}; - auto args_sb = std::unique_ptr(new lldb::SBStructuredData(args_impl)); - if (arg_info.get().max_positional_args == 2) { - if (args_sb->IsValid()) { - error_string.assign( - "args passed, but __init__ does not take an args dictionary"); - return PythonObject(); - } - result = pfunc(tp_arg, dict); - } else if (arg_info.get().max_positional_args >= 3) { - result = pfunc(tp_arg, SWIGBridge::ToSWIGWrapper(std::move(args_sb)), dict); - } else { - error_string.assign("wrong number of arguments in __init__, should be 2 or " - "3 (not including self)"); - return PythonObject(); - } - - // FIXME: At this point we should check that the class we found supports all - // the methods that we need. - - return result; -} - -bool lldb_private::python::SWIGBridge::LLDBSWIGPythonCallThreadPlan( - void *implementor, const char *method_name, lldb_private::Event *event, - bool &got_error) { - got_error = false; - - PyErr_Cleaner py_err_cleaner(false); - PythonObject self(PyRefType::Borrowed, static_cast(implementor)); - auto pfunc = self.ResolveName(method_name); - - if (!pfunc.IsAllocated()) - return false; - - PythonObject result; - if (event != nullptr) { - ScopedPythonObject event_arg = SWIGBridge::ToSWIGWrapper(event); - result = pfunc(event_arg.obj()); - } else - result = pfunc(); - - if (PyErr_Occurred()) { - got_error = true; - printf("Return value was neither false nor true for call to %s.\n", - method_name); - PyErr_Print(); - return false; - } - - if (result.get() == Py_True) - return true; - else if (result.get() == Py_False) - return false; - - // Somebody returned the wrong thing... - got_error = true; - printf("Wrong return value type for call to %s.\n", method_name); - return false; -} - -bool lldb_private::python::SWIGBridge::LLDBSWIGPythonCallThreadPlan( - void *implementor, const char *method_name, lldb_private::Stream *stream, - bool &got_error) { - got_error = false; - - PyErr_Cleaner py_err_cleaner(false); - PythonObject self(PyRefType::Borrowed, static_cast(implementor)); - auto pfunc = self.ResolveName(method_name); - - if (!pfunc.IsAllocated()) - return false; - - auto *sb_stream = new lldb::SBStream(); - PythonObject sb_stream_arg = - SWIGBridge::ToSWIGWrapper(std::unique_ptr(sb_stream)); - - PythonObject result; - result = pfunc(sb_stream_arg); - - if (PyErr_Occurred()) { - printf("Error occured for call to %s.\n", - method_name); - PyErr_Print(); - got_error = true; - return false; - } - if (stream) - stream->PutCString(sb_stream->GetData()); - return true; - -} - PythonObject lldb_private::python::SWIGBridge::LLDBSwigPythonCreateScriptedBreakpointResolver( const char *python_class_name, const char *session_dictionary_name, const StructuredDataImpl &args_impl, @@ -502,7 +375,7 @@ bool lldb_private::python::SWIGBridge::LLDBSwigPythonStopHookCallHandleStop( auto *sb_stream = new lldb::SBStream(); PythonObject sb_stream_arg = - SWIGBridge::ToSWIGWrapper(std::unique_ptr(sb_stream)); + SWIGBridge::ToSWIGWrapper(stream.get()); PythonObject result = pfunc(SWIGBridge::ToSWIGWrapper(std::move(exc_ctx_sp)), sb_stream_arg); @@ -753,6 +626,30 @@ void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBError(PyObject * data return sb_ptr; } +void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBEvent(PyObject * data) { + lldb::SBEvent *sb_ptr = nullptr; + + int valid_cast = + SWIG_ConvertPtr(data, (void **)&sb_ptr, SWIGTYPE_p_lldb__SBEvent, 0); + + if (valid_cast == -1) + return NULL; + + return sb_ptr; +} + +void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBStream(PyObject * data) { + lldb::SBStream *sb_ptr = nullptr; + + int valid_cast = + SWIG_ConvertPtr(data, (void **)&sb_ptr, SWIGTYPE_p_lldb__SBStream, 0); + + if (valid_cast == -1) + return NULL; + + return sb_ptr; +} + void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBValue(PyObject * data) { lldb::SBValue *sb_ptr = NULL; diff --git a/lldb/include/lldb/API/SBEvent.h b/lldb/include/lldb/API/SBEvent.h index cc116766e85f4a..85b401ca8cc100 100644 --- a/lldb/include/lldb/API/SBEvent.h +++ b/lldb/include/lldb/API/SBEvent.h @@ -15,6 +15,7 @@ #include namespace lldb_private { +class ScriptInterpreter; namespace python { class SWIGBridge; } @@ -73,11 +74,12 @@ class LLDB_API SBEvent { friend class SBThread; friend class SBWatchpoint; + friend class lldb_private::ScriptInterpreter; friend class lldb_private::python::SWIGBridge; SBEvent(lldb::EventSP &event_sp); - SBEvent(lldb_private::Event *event_sp); + SBEvent(lldb_private::Event *event); lldb::EventSP &GetSP() const; diff --git a/lldb/include/lldb/API/SBStream.h b/lldb/include/lldb/API/SBStream.h index 0e33f05b69916f..ee329737d594b5 100644 --- a/lldb/include/lldb/API/SBStream.h +++ b/lldb/include/lldb/API/SBStream.h @@ -13,6 +13,13 @@ #include "lldb/API/SBDefines.h" +namespace lldb_private { +class ScriptInterpreter; +namespace python { +class SWIGBridge; +} +} // namespace lldb_private + namespace lldb { class LLDB_API SBStream { @@ -101,6 +108,8 @@ class LLDB_API SBStream { friend class SBValue; friend class SBWatchpoint; + friend class lldb_private::ScriptInterpreter; + lldb_private::Stream *operator->(); lldb_private::Stream *get(); diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h index e4816352daa5db..fc0e488da69829 100644 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h @@ -10,7 +10,6 @@ #define LLDB_INTERPRETER_INTERFACES_SCRIPTEDINTERFACE_H #include "lldb/Core/StructuredDataImpl.h" -#include "lldb/Target/ExecutionContext.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/UnimplementedError.h" @@ -50,7 +49,8 @@ class ScriptedInterface { } template - bool CheckStructuredDataObject(llvm::StringRef caller, T obj, Status &error) { + static bool CheckStructuredDataObject(llvm::StringRef caller, T obj, + Status &error) { if (!obj) return ErrorWithMessage(caller, "Null Structured Data object", error); diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h new file mode 100644 index 00000000000000..4dadda4d978985 --- /dev/null +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h @@ -0,0 +1,40 @@ +//===-- ScriptedThreadPlanInterface.h ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_INTERPRETER_INTERFACES_SCRIPTEDTHREADPLANINTERFACE_H +#define LLDB_INTERPRETER_INTERFACES_SCRIPTEDTHREADPLANINTERFACE_H + +#include "lldb/lldb-private.h" + +#include "ScriptedInterface.h" + +namespace lldb_private { +class ScriptedThreadPlanInterface : public ScriptedInterface { +public: + virtual llvm::Expected + CreatePluginObject(llvm::StringRef class_name, + lldb::ThreadPlanSP thread_plan_sp, + const StructuredDataImpl &args_sp) { + llvm_unreachable("unimplemented!"); + } + + virtual llvm::Expected ExplainsStop(Event *event) { return true; } + + virtual llvm::Expected ShouldStop(Event *event) { return true; } + + virtual llvm::Expected IsStale() { return true; }; + + virtual lldb::StateType GetRunState() { return lldb::eStateStepping; } + + virtual llvm::Expected GetStopDescription(lldb_private::Stream *s) { + return true; + } +}; +} // namespace lldb_private + +#endif // LLDB_INTERPRETER_INTERFACES_SCRIPTEDTHREADPLANINTERFACE_H diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h index 0146eeb8626200..7e2a7286e20422 100644 --- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h +++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h @@ -13,8 +13,10 @@ #include "lldb/API/SBBreakpoint.h" #include "lldb/API/SBData.h" #include "lldb/API/SBError.h" +#include "lldb/API/SBEvent.h" #include "lldb/API/SBLaunchInfo.h" #include "lldb/API/SBMemoryRegionInfo.h" +#include "lldb/API/SBStream.h" #include "lldb/Breakpoint/BreakpointOptions.h" #include "lldb/Core/PluginInterface.h" #include "lldb/Core/SearchFilter.h" @@ -25,6 +27,7 @@ #include "lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h" #include "lldb/Interpreter/Interfaces/ScriptedProcessInterface.h" #include "lldb/Interpreter/Interfaces/ScriptedThreadInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h" #include "lldb/Interpreter/ScriptObject.h" #include "lldb/Utility/Broadcaster.h" #include "lldb/Utility/Status.h" @@ -253,50 +256,6 @@ class ScriptInterpreter : public PluginInterface { return lldb::ValueObjectListSP(); } - virtual StructuredData::ObjectSP - CreateScriptedThreadPlan(const char *class_name, - const StructuredDataImpl &args_data, - std::string &error_str, - lldb::ThreadPlanSP thread_plan_sp) { - return StructuredData::ObjectSP(); - } - - virtual bool - ScriptedThreadPlanExplainsStop(StructuredData::ObjectSP implementor_sp, - Event *event, bool &script_error) { - script_error = true; - return true; - } - - virtual bool - ScriptedThreadPlanShouldStop(StructuredData::ObjectSP implementor_sp, - Event *event, bool &script_error) { - script_error = true; - return true; - } - - virtual bool - ScriptedThreadPlanIsStale(StructuredData::ObjectSP implementor_sp, - bool &script_error) { - script_error = true; - return true; - } - - virtual lldb::StateType - ScriptedThreadPlanGetRunState(StructuredData::ObjectSP implementor_sp, - bool &script_error) { - script_error = true; - return lldb::eStateStepping; - } - - virtual bool - ScriptedThreadPlanGetStopDescription(StructuredData::ObjectSP implementor_sp, - lldb_private::Stream *stream, - bool &script_error) { - script_error = true; - return false; - } - virtual StructuredData::GenericSP CreateScriptedBreakpointResolver(const char *class_name, const StructuredDataImpl &args_data, @@ -566,6 +525,11 @@ class ScriptInterpreter : public PluginInterface { return std::make_shared(); } + virtual lldb::ScriptedThreadPlanInterfaceSP + CreateScriptedThreadPlanInterface() { + return std::make_shared(); + } + virtual lldb::OperatingSystemInterfaceSP CreateOperatingSystemInterface() { return std::make_shared(); } @@ -584,6 +548,10 @@ class ScriptInterpreter : public PluginInterface { Status GetStatusFromSBError(const lldb::SBError &error) const; + Event *GetOpaqueTypeFromSBEvent(const lldb::SBEvent &event) const; + + Stream *GetOpaqueTypeFromSBStream(const lldb::SBStream &stream) const; + lldb::BreakpointSP GetOpaqueTypeFromSBBreakpoint(const lldb::SBBreakpoint &breakpoint) const; diff --git a/lldb/include/lldb/Target/ThreadPlanPython.h b/lldb/include/lldb/Target/ThreadPlanPython.h index 64854d66b8f258..da106faf951db1 100644 --- a/lldb/include/lldb/Target/ThreadPlanPython.h +++ b/lldb/include/lldb/Target/ThreadPlanPython.h @@ -13,6 +13,7 @@ #include #include "lldb/Core/StructuredDataImpl.h" +#include "lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h" #include "lldb/Target/Process.h" #include "lldb/Target/StopInfo.h" #include "lldb/Target/Target.h" @@ -70,6 +71,7 @@ class ThreadPlanPython : public ThreadPlan { StreamString m_stop_description; // Cache the stop description here bool m_did_push; bool m_stop_others; + lldb::ScriptedThreadPlanInterfaceSP m_interface; ThreadPlanPython(const ThreadPlanPython &) = delete; const ThreadPlanPython &operator=(const ThreadPlanPython &) = delete; diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index aa099d4abc3b09..6138e6fe5a60b4 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -185,6 +185,7 @@ class ScriptedMetadata; class ScriptedPlatformInterface; class ScriptedProcessInterface; class ScriptedThreadInterface; +class ScriptedThreadPlanInterface; class ScriptedSyntheticChildren; class SearchFilter; class Section; @@ -393,6 +394,8 @@ typedef std::unique_ptr ScriptedProcessInterfaceUP; typedef std::shared_ptr ScriptedThreadInterfaceSP; +typedef std::shared_ptr + ScriptedThreadPlanInterfaceSP; typedef std::shared_ptr SectionSP; typedef std::unique_ptr SectionListUP; typedef std::weak_ptr SectionWP; diff --git a/lldb/source/Interpreter/ScriptInterpreter.cpp b/lldb/source/Interpreter/ScriptInterpreter.cpp index fb3fa74d0b9780..aee2ec94d7979a 100644 --- a/lldb/source/Interpreter/ScriptInterpreter.cpp +++ b/lldb/source/Interpreter/ScriptInterpreter.cpp @@ -104,6 +104,19 @@ ScriptInterpreter::GetStatusFromSBError(const lldb::SBError &error) const { return Status(); } +Event * +ScriptInterpreter::GetOpaqueTypeFromSBEvent(const lldb::SBEvent &event) const { + return event.m_opaque_ptr; +} + +Stream *ScriptInterpreter::GetOpaqueTypeFromSBStream( + const lldb::SBStream &stream) const { + if (stream.m_opaque_up) + return const_cast(stream).m_opaque_up.get(); + + return nullptr; +} + std::optional ScriptInterpreter::GetOpaqueTypeFromSBMemoryRegionInfo( const lldb::SBMemoryRegionInfo &mem_region) const { diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt index b22abc49c92a9a..c60e4bb503a371 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt @@ -24,6 +24,7 @@ add_lldb_library(lldbPluginScriptInterpreterPythonInterfaces ScriptedPythonInterface.cpp ScriptedProcessPythonInterface.cpp ScriptedThreadPythonInterface.cpp + ScriptedThreadPlanPythonInterface.cpp ScriptedPlatformPythonInterface.cpp LINK_LIBS diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp index 9ba4731032bd35..6e93bec80056ee 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp @@ -20,6 +20,8 @@ #include "../ScriptInterpreterPythonImpl.h" #include "ScriptedPlatformPythonInterface.h" +#include "lldb/Target/ExecutionContext.h" + using namespace lldb; using namespace lldb_private; using namespace lldb_private::python; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp index 6f22503b279ca6..7d072212676e13 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp @@ -26,6 +26,15 @@ ScriptedPythonInterface::ScriptedPythonInterface( ScriptInterpreterPythonImpl &interpreter) : ScriptedInterface(), m_interpreter(interpreter) {} +template <> +void ScriptedPythonInterface::ReverseTransform( + lldb_private::Stream *&original_arg, python::PythonObject transformed_arg, + Status &error) { + Stream *s = ExtractValueFromPythonObject(transformed_arg, error); + *original_arg = *s; + original_arg->PutCString(static_cast(s)->GetData()); +} + template <> StructuredData::ArraySP ScriptedPythonInterface::ExtractValueFromPythonObject( @@ -48,12 +57,33 @@ Status ScriptedPythonInterface::ExtractValueFromPythonObject( if (lldb::SBError *sb_error = reinterpret_cast( python::LLDBSWIGPython_CastPyObjectToSBError(p.get()))) return m_interpreter.GetStatusFromSBError(*sb_error); - else - error.SetErrorString("Couldn't cast lldb::SBError to lldb::Status."); + error.SetErrorString("Couldn't cast lldb::SBError to lldb::Status."); return {}; } +template <> +Event *ScriptedPythonInterface::ExtractValueFromPythonObject( + python::PythonObject &p, Status &error) { + if (lldb::SBEvent *sb_event = reinterpret_cast( + python::LLDBSWIGPython_CastPyObjectToSBEvent(p.get()))) + return m_interpreter.GetOpaqueTypeFromSBEvent(*sb_event); + error.SetErrorString("Couldn't cast lldb::SBEvent to lldb_private::Event."); + + return nullptr; +} + +template <> +Stream *ScriptedPythonInterface::ExtractValueFromPythonObject( + python::PythonObject &p, Status &error) { + if (lldb::SBStream *sb_stream = reinterpret_cast( + python::LLDBSWIGPython_CastPyObjectToSBStream(p.get()))) + return m_interpreter.GetOpaqueTypeFromSBStream(*sb_stream); + error.SetErrorString("Couldn't cast lldb::SBStream to lldb_private::Stream."); + + return nullptr; +} + template <> lldb::DataExtractorSP ScriptedPythonInterface::ExtractValueFromPythonObject( diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h index 7af98163970999..cc760938c89959 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h @@ -224,6 +224,10 @@ class ScriptedPythonInterface : virtual public ScriptedInterface { return python::SWIGBridge::ToSWIGWrapper(arg); } + python::PythonObject Transform(lldb::ThreadPlanSP arg) { + return python::SWIGBridge::ToSWIGWrapper(arg); + } + python::PythonObject Transform(lldb::ProcessAttachInfoSP arg) { return python::SWIGBridge::ToSWIGWrapper(arg); } @@ -232,6 +236,14 @@ class ScriptedPythonInterface : virtual public ScriptedInterface { return python::SWIGBridge::ToSWIGWrapper(arg); } + python::PythonObject Transform(Event *arg) { + return python::SWIGBridge::ToSWIGWrapper(arg); + } + + python::PythonObject Transform(Stream *arg) { + return python::SWIGBridge::ToSWIGWrapper(arg); + } + python::PythonObject Transform(lldb::DataExtractorSP arg) { return python::SWIGBridge::ToSWIGWrapper(arg); } @@ -329,6 +341,14 @@ template <> Status ScriptedPythonInterface::ExtractValueFromPythonObject( python::PythonObject &p, Status &error); +template <> +Event *ScriptedPythonInterface::ExtractValueFromPythonObject( + python::PythonObject &p, Status &error); + +template <> +Stream *ScriptedPythonInterface::ExtractValueFromPythonObject( + python::PythonObject &p, Status &error); + template <> lldb::BreakpointSP ScriptedPythonInterface::ExtractValueFromPythonObject( diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp new file mode 100644 index 00000000000000..df9f7db6f62b02 --- /dev/null +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp @@ -0,0 +1,92 @@ +//===-- ScriptedThreadPlanPythonInterface.cpp -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Host/Config.h" +#include "lldb/Utility/Log.h" +#include "lldb/lldb-enumerations.h" + +#if LLDB_ENABLE_PYTHON + +// LLDB Python header must be included first +#include "../lldb-python.h" + +#include "../SWIGPythonBridge.h" +#include "../ScriptInterpreterPythonImpl.h" +#include "ScriptedThreadPlanPythonInterface.h" + +using namespace lldb; +using namespace lldb_private; +using namespace lldb_private::python; + +ScriptedThreadPlanPythonInterface::ScriptedThreadPlanPythonInterface( + ScriptInterpreterPythonImpl &interpreter) + : ScriptedThreadPlanInterface(), ScriptedPythonInterface(interpreter) {} + +llvm::Expected +ScriptedThreadPlanPythonInterface::CreatePluginObject( + const llvm::StringRef class_name, lldb::ThreadPlanSP thread_plan_sp, + const StructuredDataImpl &args_sp) { + return ScriptedPythonInterface::CreatePluginObject(class_name, nullptr, + thread_plan_sp, args_sp); +} + +llvm::Expected +ScriptedThreadPlanPythonInterface::ExplainsStop(Event *event) { + Status error; + StructuredData::ObjectSP obj = Dispatch("explains_stop", error, event); + + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + return error.ToError(); + + return obj->GetBooleanValue(); +} + +llvm::Expected +ScriptedThreadPlanPythonInterface::ShouldStop(Event *event) { + Status error; + StructuredData::ObjectSP obj = Dispatch("should_stop", error, event); + + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + return error.ToError(); + + return obj->GetBooleanValue(); +} + +llvm::Expected ScriptedThreadPlanPythonInterface::IsStale() { + Status error; + StructuredData::ObjectSP obj = Dispatch("is_stale", error); + + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + return error.ToError(); + + return obj->GetBooleanValue(); +} + +lldb::StateType ScriptedThreadPlanPythonInterface::GetRunState() { + Status error; + StructuredData::ObjectSP obj = Dispatch("should_step", error); + + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + return lldb::eStateStepping; + + return static_cast(obj->GetUnsignedIntegerValue( + static_cast(lldb::eStateStepping))); +} + +llvm::Expected +ScriptedThreadPlanPythonInterface::GetStopDescription(lldb_private::Stream *s) { + Status error; + Dispatch("stop_description", error, s); + + if (error.Fail()) + return error.ToError(); + + return true; +} + +#endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.h new file mode 100644 index 00000000000000..2eb986e0282f0f --- /dev/null +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.h @@ -0,0 +1,44 @@ +//===-- ScriptedThreadPlanPythonInterface.h ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDTHREADPLANPYTHONINTERFACE_H +#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDTHREADPLANPYTHONINTERFACE_H + +#include "lldb/Host/Config.h" + +#if LLDB_ENABLE_PYTHON + +#include "ScriptedPythonInterface.h" +#include "lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h" +#include + +namespace lldb_private { +class ScriptedThreadPlanPythonInterface : public ScriptedThreadPlanInterface, + public ScriptedPythonInterface { +public: + ScriptedThreadPlanPythonInterface(ScriptInterpreterPythonImpl &interpreter); + + llvm::Expected + CreatePluginObject(const llvm::StringRef class_name, + lldb::ThreadPlanSP thread_plan_sp, + const StructuredDataImpl &args_sp) override; + + llvm::Expected ExplainsStop(Event *event) override; + + llvm::Expected ShouldStop(Event *event) override; + + llvm::Expected IsStale() override; + + lldb::StateType GetRunState() override; + + llvm::Expected GetStopDescription(lldb_private::Stream *s) override; +}; +} // namespace lldb_private + +#endif // LLDB_ENABLE_PYTHON +#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDTHREADPLANPYTHONINTERFACE_H diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp index 18e268527eb2fb..ba2ec0e78e9f53 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/Host/Config.h" +#include "lldb/Target/ExecutionContext.h" #include "lldb/Utility/Log.h" #include "lldb/lldb-enumerations.h" diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h index 7cdd5577919ba8..41f3a80a02b13b 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h @@ -96,12 +96,13 @@ class SWIGBridge { static PythonObject ToSWIGWrapper(lldb::ExecutionContextRefSP ctx_sp); static PythonObject ToSWIGWrapper(const TypeSummaryOptions &summary_options); static PythonObject ToSWIGWrapper(const SymbolContext &sym_ctx); + static PythonObject ToSWIGWrapper(const Stream *stream); + static PythonObject ToSWIGWrapper(Event *event); static PythonObject ToSWIGWrapper(lldb::ProcessAttachInfoSP attach_info_sp); static PythonObject ToSWIGWrapper(lldb::ProcessLaunchInfoSP launch_info_sp); static PythonObject ToSWIGWrapper(lldb::DataExtractorSP data_extractor_sp); - static PythonObject ToSWIGWrapper(std::unique_ptr stream_sb); static PythonObject ToSWIGWrapper(std::unique_ptr data_sb); static PythonObject @@ -111,7 +112,6 @@ class SWIGBridge { static python::ScopedPythonObject ToSWIGWrapper(CommandReturnObject &cmd_retobj); - static python::ScopedPythonObject ToSWIGWrapper(Event *event); // These prototypes are the Pythonic implementations of the required // callbacks. Although these are scripting-language specific, their definition // depends on the public API. @@ -146,21 +146,6 @@ class SWIGBridge { const char *session_dictionary_name, lldb::DebuggerSP debugger_sp); - static python::PythonObject LLDBSwigPythonCreateScriptedThreadPlan( - const char *python_class_name, const char *session_dictionary_name, - const StructuredDataImpl &args_data, std::string &error_string, - const lldb::ThreadPlanSP &thread_plan_sp); - - static bool LLDBSWIGPythonCallThreadPlan(void *implementor, - const char *method_name, - lldb_private::Event *event_sp, - bool &got_error); - - static bool LLDBSWIGPythonCallThreadPlan(void *implementor, - const char *method_name, - lldb_private::Stream *stream, - bool &got_error); - static python::PythonObject LLDBSwigPythonCreateScriptedBreakpointResolver( const char *python_class_name, const char *session_dictionary_name, const StructuredDataImpl &args, const lldb::BreakpointSP &bkpt_sp); @@ -262,6 +247,8 @@ void *LLDBSWIGPython_CastPyObjectToSBBreakpoint(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBAttachInfo(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBLaunchInfo(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBError(PyObject *data); +void *LLDBSWIGPython_CastPyObjectToSBEvent(PyObject *data); +void *LLDBSWIGPython_CastPyObjectToSBStream(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBValue(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo(PyObject *data); } // namespace python diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 953f8b3aba18f7..b71f856efda2ea 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -17,6 +17,7 @@ #include "Interfaces/OperatingSystemPythonInterface.h" #include "Interfaces/ScriptedPlatformPythonInterface.h" #include "Interfaces/ScriptedProcessPythonInterface.h" +#include "Interfaces/ScriptedThreadPlanPythonInterface.h" #include "Interfaces/ScriptedThreadPythonInterface.h" #include "PythonDataObjects.h" #include "PythonReadline.h" @@ -1535,6 +1536,11 @@ ScriptInterpreterPythonImpl::CreateScriptedThreadInterface() { return std::make_shared(*this); } +ScriptedThreadPlanInterfaceSP +ScriptInterpreterPythonImpl::CreateScriptedThreadPlanInterface() { + return std::make_shared(*this); +} + OperatingSystemInterfaceSP ScriptInterpreterPythonImpl::CreateOperatingSystemInterface() { return std::make_shared(*this); @@ -1551,122 +1557,6 @@ ScriptInterpreterPythonImpl::CreateStructuredDataFromScriptObject( return py_obj.CreateStructuredObject(); } -StructuredData::ObjectSP ScriptInterpreterPythonImpl::CreateScriptedThreadPlan( - const char *class_name, const StructuredDataImpl &args_data, - std::string &error_str, lldb::ThreadPlanSP thread_plan_sp) { - if (class_name == nullptr || class_name[0] == '\0') - return StructuredData::ObjectSP(); - - if (!thread_plan_sp.get()) - return {}; - - Debugger &debugger = thread_plan_sp->GetTarget().GetDebugger(); - ScriptInterpreterPythonImpl *python_interpreter = - GetPythonInterpreter(debugger); - - if (!python_interpreter) - return {}; - - Locker py_lock(this, - Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - PythonObject ret_val = SWIGBridge::LLDBSwigPythonCreateScriptedThreadPlan( - class_name, python_interpreter->m_dictionary_name.c_str(), args_data, - error_str, thread_plan_sp); - if (!ret_val) - return {}; - - return StructuredData::ObjectSP( - new StructuredPythonObject(std::move(ret_val))); -} - -bool ScriptInterpreterPythonImpl::ScriptedThreadPlanExplainsStop( - StructuredData::ObjectSP implementor_sp, Event *event, bool &script_error) { - bool explains_stop = true; - StructuredData::Generic *generic = nullptr; - if (implementor_sp) - generic = implementor_sp->GetAsGeneric(); - if (generic) { - Locker py_lock(this, - Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - explains_stop = SWIGBridge::LLDBSWIGPythonCallThreadPlan( - generic->GetValue(), "explains_stop", event, script_error); - if (script_error) - return true; - } - return explains_stop; -} - -bool ScriptInterpreterPythonImpl::ScriptedThreadPlanShouldStop( - StructuredData::ObjectSP implementor_sp, Event *event, bool &script_error) { - bool should_stop = true; - StructuredData::Generic *generic = nullptr; - if (implementor_sp) - generic = implementor_sp->GetAsGeneric(); - if (generic) { - Locker py_lock(this, - Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - should_stop = SWIGBridge::LLDBSWIGPythonCallThreadPlan( - generic->GetValue(), "should_stop", event, script_error); - if (script_error) - return true; - } - return should_stop; -} - -bool ScriptInterpreterPythonImpl::ScriptedThreadPlanIsStale( - StructuredData::ObjectSP implementor_sp, bool &script_error) { - bool is_stale = true; - StructuredData::Generic *generic = nullptr; - if (implementor_sp) - generic = implementor_sp->GetAsGeneric(); - if (generic) { - Locker py_lock(this, - Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - is_stale = SWIGBridge::LLDBSWIGPythonCallThreadPlan( - generic->GetValue(), "is_stale", (Event *)nullptr, script_error); - if (script_error) - return true; - } - return is_stale; -} - -lldb::StateType ScriptInterpreterPythonImpl::ScriptedThreadPlanGetRunState( - StructuredData::ObjectSP implementor_sp, bool &script_error) { - bool should_step = false; - StructuredData::Generic *generic = nullptr; - if (implementor_sp) - generic = implementor_sp->GetAsGeneric(); - if (generic) { - Locker py_lock(this, - Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - should_step = SWIGBridge::LLDBSWIGPythonCallThreadPlan( - generic->GetValue(), "should_step", (Event *)nullptr, script_error); - if (script_error) - should_step = true; - } - if (should_step) - return lldb::eStateStepping; - return lldb::eStateRunning; -} - -bool -ScriptInterpreterPythonImpl::ScriptedThreadPlanGetStopDescription( - StructuredData::ObjectSP implementor_sp, lldb_private::Stream *stream, - bool &script_error) { - StructuredData::Generic *generic = nullptr; - if (implementor_sp) - generic = implementor_sp->GetAsGeneric(); - if (!generic) { - script_error = true; - return false; - } - Locker py_lock(this, - Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - return SWIGBridge::LLDBSWIGPythonCallThreadPlan( - generic->GetValue(), "stop_description", stream, script_error); -} - - StructuredData::GenericSP ScriptInterpreterPythonImpl::CreateScriptedBreakpointResolver( const char *class_name, const StructuredDataImpl &args_data, diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h index a33499816d8d38..da8e3a63d08470 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h @@ -77,34 +77,9 @@ class ScriptInterpreterPythonImpl : public ScriptInterpreterPython { StructuredData::GenericSP CreateScriptCommandObject(const char *class_name) override; - StructuredData::ObjectSP - CreateScriptedThreadPlan(const char *class_name, - const StructuredDataImpl &args_data, - std::string &error_str, - lldb::ThreadPlanSP thread_plan) override; - StructuredData::ObjectSP CreateStructuredDataFromScriptObject(ScriptObject obj) override; - bool ScriptedThreadPlanExplainsStop(StructuredData::ObjectSP implementor_sp, - Event *event, - bool &script_error) override; - - bool ScriptedThreadPlanShouldStop(StructuredData::ObjectSP implementor_sp, - Event *event, bool &script_error) override; - - bool ScriptedThreadPlanIsStale(StructuredData::ObjectSP implementor_sp, - bool &script_error) override; - - lldb::StateType - ScriptedThreadPlanGetRunState(StructuredData::ObjectSP implementor_sp, - bool &script_error) override; - - bool - ScriptedThreadPlanGetStopDescription(StructuredData::ObjectSP implementor_sp, - lldb_private::Stream *s, - bool &script_error) override; - StructuredData::GenericSP CreateScriptedBreakpointResolver(const char *class_name, const StructuredDataImpl &args_data, @@ -136,6 +111,9 @@ class ScriptInterpreterPythonImpl : public ScriptInterpreterPython { lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() override; + lldb::ScriptedThreadPlanInterfaceSP + CreateScriptedThreadPlanInterface() override; + lldb::OperatingSystemInterfaceSP CreateOperatingSystemInterface() override; StructuredData::ObjectSP diff --git a/lldb/source/Target/ThreadPlanPython.cpp b/lldb/source/Target/ThreadPlanPython.cpp index d6de6b3c3cf049..48b3e3a1bcab61 100644 --- a/lldb/source/Target/ThreadPlanPython.cpp +++ b/lldb/source/Target/ThreadPlanPython.cpp @@ -32,6 +32,23 @@ ThreadPlanPython::ThreadPlanPython(Thread &thread, const char *class_name, eVoteNoOpinion, eVoteNoOpinion), m_class_name(class_name), m_args_data(args_data), m_did_push(false), m_stop_others(false) { + ScriptInterpreter *interpreter = GetScriptInterpreter(); + if (!interpreter) { + SetPlanComplete(false); + // FIXME: error handling + return; + } + + m_interface = interpreter->CreateScriptedThreadPlanInterface(); + if (!m_interface) { + SetPlanComplete(false); + // FIXME: error handling + // error.SetErrorStringWithFormat( + // "ThreadPlanPython::%s () - ERROR: %s", __FUNCTION__, + // "Script interpreter couldn't create Scripted Thread Plan Interface"); + return; + } + SetIsControllingPlan(true); SetOkayToDiscard(true); SetPrivate(false); @@ -60,13 +77,14 @@ void ThreadPlanPython::DidPush() { // We set up the script side in DidPush, so that it can push other plans in // the constructor, and doesn't have to care about the details of DidPush. m_did_push = true; - if (!m_class_name.empty()) { - ScriptInterpreter *script_interp = GetScriptInterpreter(); - if (script_interp) { - m_implementation_sp = script_interp->CreateScriptedThreadPlan( - m_class_name.c_str(), m_args_data, m_error_str, - this->shared_from_this()); - } + if (m_interface) { + auto obj_or_err = m_interface->CreatePluginObject( + m_class_name, this->shared_from_this(), m_args_data); + if (!obj_or_err) { + m_error_str = llvm::toString(obj_or_err.takeError()); + SetPlanComplete(false); + } else + m_implementation_sp = *obj_or_err; } } @@ -77,14 +95,13 @@ bool ThreadPlanPython::ShouldStop(Event *event_ptr) { bool should_stop = true; if (m_implementation_sp) { - ScriptInterpreter *script_interp = GetScriptInterpreter(); - if (script_interp) { - bool script_error; - should_stop = script_interp->ScriptedThreadPlanShouldStop( - m_implementation_sp, event_ptr, script_error); - if (script_error) - SetPlanComplete(false); - } + auto should_stop_or_err = m_interface->ShouldStop(event_ptr); + if (!should_stop_or_err) { + LLDB_LOG_ERROR(GetLog(LLDBLog::Thread), should_stop_or_err.takeError(), + "Can't call ScriptedThreadPlan::ShouldStop."); + SetPlanComplete(false); + } else + should_stop = *should_stop_or_err; } return should_stop; } @@ -96,14 +113,13 @@ bool ThreadPlanPython::IsPlanStale() { bool is_stale = true; if (m_implementation_sp) { - ScriptInterpreter *script_interp = GetScriptInterpreter(); - if (script_interp) { - bool script_error; - is_stale = script_interp->ScriptedThreadPlanIsStale(m_implementation_sp, - script_error); - if (script_error) - SetPlanComplete(false); - } + auto is_stale_or_err = m_interface->IsStale(); + if (!is_stale_or_err) { + LLDB_LOG_ERROR(GetLog(LLDBLog::Thread), is_stale_or_err.takeError(), + "Can't call ScriptedThreadPlan::IsStale."); + SetPlanComplete(false); + } else + is_stale = *is_stale_or_err; } return is_stale; } @@ -115,14 +131,14 @@ bool ThreadPlanPython::DoPlanExplainsStop(Event *event_ptr) { bool explains_stop = true; if (m_implementation_sp) { - ScriptInterpreter *script_interp = GetScriptInterpreter(); - if (script_interp) { - bool script_error; - explains_stop = script_interp->ScriptedThreadPlanExplainsStop( - m_implementation_sp, event_ptr, script_error); - if (script_error) - SetPlanComplete(false); - } + auto explains_stop_or_error = m_interface->ExplainsStop(event_ptr); + if (!explains_stop_or_error) { + LLDB_LOG_ERROR(GetLog(LLDBLog::Thread), + explains_stop_or_error.takeError(), + "Can't call ScriptedThreadPlan::ExplainsStop."); + SetPlanComplete(false); + } else + explains_stop = *explains_stop_or_error; } return explains_stop; } @@ -150,14 +166,8 @@ lldb::StateType ThreadPlanPython::GetPlanRunState() { LLDB_LOGF(log, "%s called on Python Thread Plan: %s )", LLVM_PRETTY_FUNCTION, m_class_name.c_str()); lldb::StateType run_state = eStateRunning; - if (m_implementation_sp) { - ScriptInterpreter *script_interp = GetScriptInterpreter(); - if (script_interp) { - bool script_error; - run_state = script_interp->ScriptedThreadPlanGetRunState( - m_implementation_sp, script_error); - } - } + if (m_implementation_sp) + run_state = m_interface->GetRunState(); return run_state; } @@ -168,12 +178,13 @@ void ThreadPlanPython::GetDescription(Stream *s, lldb::DescriptionLevel level) { if (m_implementation_sp) { ScriptInterpreter *script_interp = GetScriptInterpreter(); if (script_interp) { - bool script_error; - bool added_desc = script_interp->ScriptedThreadPlanGetStopDescription( - m_implementation_sp, s, script_error); - if (script_error || !added_desc) + auto desc_or_err = m_interface->GetStopDescription(s); + if (!desc_or_err || !*desc_or_err) { + LLDB_LOG_ERROR(GetLog(LLDBLog::Thread), desc_or_err.takeError(), + "Can't call ScriptedThreadPlan::GetStopDescription."); s->Printf("Python thread plan implemented by class %s.", m_class_name.c_str()); + } } return; } diff --git a/lldb/test/API/functionalities/step_scripted/Steps.py b/lldb/test/API/functionalities/step_scripted/Steps.py index 7527607be847a5..3325dba7536571 100644 --- a/lldb/test/API/functionalities/step_scripted/Steps.py +++ b/lldb/test/API/functionalities/step_scripted/Steps.py @@ -47,7 +47,7 @@ def queue_child_thread_plan(self): # This plan does a step-over until a variable changes value. class StepUntil(StepWithChild): - def __init__(self, thread_plan, args_data, dict): + def __init__(self, thread_plan, args_data): self.thread_plan = thread_plan self.frame = thread_plan.GetThread().frames[0] self.target = thread_plan.GetThread().GetProcess().GetTarget() @@ -99,7 +99,7 @@ def stop_description(self, stream): class StepReportsStopOthers: stop_mode_dict = {} - def __init__(self, thread_plan, args_data, dict): + def __init__(self, thread_plan, args_data): self.thread_plan = thread_plan self.key = str(args_data.GetValueForKey("token").GetUnsignedIntegerValue(1000)) diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp index 7f3359f6bf26b2..72dcf45a867e50 100644 --- a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp +++ b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp @@ -96,26 +96,6 @@ lldb_private::python::SWIGBridge::LLDBSwigPythonCreateCommandObject( return python::PythonObject(); } -python::PythonObject -lldb_private::python::SWIGBridge::LLDBSwigPythonCreateScriptedThreadPlan( - const char *python_class_name, const char *session_dictionary_name, - const StructuredDataImpl &args_data, std::string &error_string, - const lldb::ThreadPlanSP &thread_plan_sp) { - return python::PythonObject(); -} - -bool lldb_private::python::SWIGBridge::LLDBSWIGPythonCallThreadPlan( - void *implementor, const char *method_name, Event *event_sp, - bool &got_error) { - return false; -} - -bool lldb_private::python::SWIGBridge::LLDBSWIGPythonCallThreadPlan( - void *implementor, const char *method_name, Stream *event_sp, - bool &got_error) { - return false; -} - python::PythonObject lldb_private::python::SWIGBridge:: LLDBSwigPythonCreateScriptedBreakpointResolver( const char *python_class_name, const char *session_dictionary_name, @@ -170,6 +150,16 @@ lldb_private::python::LLDBSWIGPython_CastPyObjectToSBError(PyObject *data) { return nullptr; } +void * +lldb_private::python::LLDBSWIGPython_CastPyObjectToSBEvent(PyObject *data) { + return nullptr; +} + +void * +lldb_private::python::LLDBSWIGPython_CastPyObjectToSBStream(PyObject *data) { + return nullptr; +} + void * lldb_private::python::LLDBSWIGPython_CastPyObjectToSBValue(PyObject *data) { return nullptr; @@ -319,6 +309,11 @@ lldb_private::python::SWIGBridge::ToSWIGWrapper(lldb::ExecutionContextRefSP) { return python::PythonObject(); } +python::PythonObject +lldb_private::python::SWIGBridge::ToSWIGWrapper(lldb::ThreadPlanSP) { + return python::PythonObject(); +} + python::PythonObject lldb_private::python::SWIGBridge::ToSWIGWrapper(lldb::ProcessSP) { return python::PythonObject(); @@ -328,3 +323,13 @@ python::PythonObject lldb_private::python::SWIGBridge::ToSWIGWrapper( const lldb_private::StructuredDataImpl &) { return python::PythonObject(); } + +python::PythonObject +lldb_private::python::SWIGBridge::ToSWIGWrapper(Event *event) { + return python::PythonObject(); +} + +python::PythonObject +lldb_private::python::SWIGBridge::ToSWIGWrapper(const Stream *stream) { + return python::PythonObject(); +} From 5908559c1091245989672ca486c6b3c0a8f60b5a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 30 Oct 2023 17:03:04 -0700 Subject: [PATCH 100/144] [X86] Don't set SHF_X86_64_LARGE for variables with explicit section name of a well-known small data section prefix (#70748) Commit f3ea73133f91c1c23596d45680c8f2269c1dd289 allows SHF_X86_64_LARGE for all global variables with an explicit section. For the following variables, their data sections will be annotated as SHF_X86_64_LARGE. ``` const char relro[512] __attribute__((section(".rodata"))) = "a"; const char *const relro __attribute__((section(".data.rel.ro"))) = "a"; char data[512] __attribute__((section(".data"))) = "a"; ``` The typical linker requirement is that we do not create more than one output section with the same name, and the only output section should have the bitwise OR value of all input section flags. Therefore, the output .data section will have the SHF_X86_64_LARGE flag and be moved away from the regular sections. This is undesired but benign. However, .data.rel.ro having the SHF_X86_64_LARGE flag is problematic because dynamic loaders do not support more than one PT_GNU_RELRO program header, and LLD produces the error `error: section: .jcr is not contiguous with other relro sections`. I believe the most appropriate solution is to disallow SHF_X86_64_LARGE on variables with an explicit section of certain prefixes ( .bss/.data/.bss) and allow others (e.g. metadata sections for various instrumentation). Fortunately, global variables with an explicit .bss/.data/.bss section are rare, so they should not cause excessive relocation overflow pressure. --- llvm/lib/Target/TargetMachine.cpp | 14 ++++++++++++ .../CodeGen/X86/code-model-elf-sections.ll | 22 ++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp index 45fb612cb91da1..9dc00ff85e009b 100644 --- a/llvm/lib/Target/TargetMachine.cpp +++ b/llvm/lib/Target/TargetMachine.cpp @@ -46,6 +46,20 @@ bool TargetMachine::isLargeData(const GlobalVariable *GV) const { // restrict this to medium. if (getCodeModel() != CodeModel::Medium) return false; + + // Allowing large metadata sections in the presence of an explicit section is + // useful, even if GCC does not allow them. However, we should not mark + // certain well-known prefixes as large, because it would make the whole + // output section large and cause the linker to move it, which is almost + // always undesired. + StringRef Name = GV->getSection(); + auto IsPrefix = [&](StringRef Prefix) { + StringRef S = Name; + return S.consume_front(Prefix) && (S.empty() || S[0] == '.'); + }; + if (IsPrefix(".bss") || IsPrefix(".data") || IsPrefix(".rodata")) + return false; + const DataLayout &DL = GV->getParent()->getDataLayout(); uint64_t Size = DL.getTypeSizeInBits(GV->getValueType()) / 8; return Size == 0 || Size > LargeDataThreshold; diff --git a/llvm/test/CodeGen/X86/code-model-elf-sections.ll b/llvm/test/CodeGen/X86/code-model-elf-sections.ll index fe659fa9a46e72..5f579edc440d6b 100644 --- a/llvm/test/CodeGen/X86/code-model-elf-sections.ll +++ b/llvm/test/CodeGen/X86/code-model-elf-sections.ll @@ -17,6 +17,8 @@ ; RUN: llvm-readelf -S %t | FileCheck %s --check-prefix=SMALL-DS ; SMALL: .data {{.*}} WA {{.*}} +; SMALL: .data.x {{.*}} WA {{.*}} +; SMALL: .data0 {{.*}} WA {{.*}} ; SMALL: foo {{.*}} WA {{.*}} ; SMALL: .bss {{.*}} WA {{.*}} ; SMALL: .rodata {{.*}} A {{.*}} @@ -24,6 +26,9 @@ ; SMALL: .tbss {{.*}} WAT {{.*}} ; SMALL: .tdata {{.*}} WAT {{.*}} +; SMALL-DS: .data {{.*}} WA {{.*}} +; SMALL-DS: .data.x {{.*}} WA {{.*}} +; SMALL-DS: .data0 {{.*}} WA {{.*}} ; SMALL-DS: .data.data {{.*}} WA {{.*}} ; SMALL-DS: foo {{.*}} WA {{.*}} ; SMALL-DS: .bss.bss {{.*}} WA {{.*}} @@ -32,17 +37,27 @@ ; SMALL-DS: .tbss.tbss {{.*}} WAT {{.*}} ; SMALL-DS: .tdata.tdata {{.*}} WAT {{.*}} +; LARGE: .data {{.*}} WA {{.*}} +; LARGE: .data.x {{.*}} WA {{.*}} +; LARGE: .data0 {{.*}} WAl {{.*}} ; LARGE: .ldata {{.*}} WAl {{.*}} ; LARGE: foo {{.*}} WAl {{.*}} +; LARGE: .bss {{.*}} WA {{.*}} ; LARGE: .lbss {{.*}} WAl {{.*}} +; LARGE: .rodata {{.*}} A {{.*}} ; LARGE: .lrodata {{.*}} Al {{.*}} ; LARGE: .ldata.rel.ro {{.*}} WAl {{.*}} ; LARGE: .tbss {{.*}} WAT {{.*}} ; LARGE: .tdata {{.*}} WAT {{.*}} +; LARGE-DS: .data {{.*}} WA {{.*}} +; LARGE-DS: .data.x {{.*}} WA {{.*}} +; LARGE-DS: .data0 {{.*}} WAl {{.*}} ; LARGE-DS: .ldata.data {{.*}} WAl {{.*}} ; LARGE-DS: foo {{.*}} WAl {{.*}} +; LARGE-DS: .bss {{.*}} WA {{.*}} ; LARGE-DS: .lbss.bss {{.*}} WAl {{.*}} +; LARGE-DS: .rodata {{.*}} A {{.*}} ; LARGE-DS: .lrodata.rodata {{.*}} Al {{.*}} ; LARGE-DS: .ldata.rel.ro.relro {{.*}} WAl {{.*}} ; LARGE-DS: .tbss.tbss {{.*}} WAT {{.*}} @@ -51,9 +66,14 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64--linux" +@data_with_explicit_section = internal global [10 x i64] [i64 1, i64 2, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0], section ".data" +@data_with_explicit_section2 = internal global [10 x i64] [i64 1, i64 2, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0], section ".data.x" +@data_with_explicit_section0 = internal global [10 x i64] [i64 1, i64 2, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0], section ".data0" @data = internal global [10 x i64] [i64 1, i64 2, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0] -@data_with_explicit_section = internal global [10 x i64] [i64 1, i64 2, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0], section "foo" +@foo_with_explicit_section = internal global [10 x i64] [i64 1, i64 2, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0], section "foo" +@bss_with_explicit_section = internal global [10 x i64] zeroinitializer, section ".bss" @bss = internal global [10 x i64] zeroinitializer +@rodata_with_explicit_section = internal constant [10 x i64] zeroinitializer, section ".rodata" @rodata = internal constant [10 x i64] zeroinitializer @relro = internal constant [10 x ptr] [ptr @func, ptr @func, ptr @func, ptr @func, ptr @func, ptr @func, ptr @func, ptr @func, ptr @func, ptr @func] @tbss = internal thread_local global [10 x i64] zeroinitializer From e137af60cd012243b33843d752638d8663347e4d Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 30 Oct 2023 16:32:05 -0700 Subject: [PATCH 101/144] [OpenMP][NFC] Fix test to actually check for the result --- openmp/libomptarget/test/offloading/malloc_parallel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openmp/libomptarget/test/offloading/malloc_parallel.c b/openmp/libomptarget/test/offloading/malloc_parallel.c index b8e975ca55a8ff..4908e00694d99e 100644 --- a/openmp/libomptarget/test/offloading/malloc_parallel.c +++ b/openmp/libomptarget/test/offloading/malloc_parallel.c @@ -1,5 +1,5 @@ -// RUN: %libomptarget-compile-generic && %libomptarget-run-generic -// RUN: %libomptarget-compileopt-generic && %libomptarget-run-generic +// RUN: %libomptarget-compile-run-and-check-generic +// RUN: %libomptarget-compileopt-run-and-check-generic #include #include From ed5faa475b401aa91284d19afb52b2927aa59c97 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Mon, 30 Oct 2023 17:24:27 -0700 Subject: [PATCH 102/144] [lldb] Fix build failure introduced in 484038416d06 (NFC) Signed-off-by: Med Ismail Bennani --- .../ScriptedProcessPythonInterface.cpp | 27 ++++++++++++------- .../ScriptedThreadPlanPythonInterface.cpp | 12 ++++++--- .../ScriptedThreadPythonInterface.cpp | 27 ++++++++++++------- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp index e86b34d6b930e4..313c597ce48f3c 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp @@ -49,7 +49,8 @@ StructuredData::DictionarySP ScriptedProcessPythonInterface::GetCapabilities() { StructuredData::DictionarySP dict = Dispatch("get_capabilities", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, + error)) return {}; return dict; @@ -90,7 +91,8 @@ StructuredData::DictionarySP ScriptedProcessPythonInterface::GetThreadsInfo() { StructuredData::DictionarySP dict = Dispatch("get_threads_info", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, + error)) return {}; return dict; @@ -106,7 +108,8 @@ bool ScriptedProcessPythonInterface::CreateBreakpoint(lldb::addr_t addr, if (py_error.Fail()) error = py_error; - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return {}; return obj->GetBooleanValue(); @@ -131,7 +134,8 @@ lldb::offset_t ScriptedProcessPythonInterface::WriteMemoryAtAddress( StructuredData::ObjectSP obj = Dispatch("write_memory_at_address", py_error, addr, data_sp, error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return LLDB_INVALID_OFFSET; // If there was an error on the python call, surface it to the user. @@ -146,7 +150,8 @@ StructuredData::ArraySP ScriptedProcessPythonInterface::GetLoadedImages() { StructuredData::ArraySP array = Dispatch("get_loaded_images", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, array, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, array, + error)) return {}; return array; @@ -156,7 +161,8 @@ lldb::pid_t ScriptedProcessPythonInterface::GetProcessID() { Status error; StructuredData::ObjectSP obj = Dispatch("get_process_id", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return LLDB_INVALID_PROCESS_ID; return obj->GetUnsignedIntegerValue(LLDB_INVALID_PROCESS_ID); @@ -166,7 +172,8 @@ bool ScriptedProcessPythonInterface::IsAlive() { Status error; StructuredData::ObjectSP obj = Dispatch("is_alive", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return {}; return obj->GetBooleanValue(); @@ -177,7 +184,8 @@ ScriptedProcessPythonInterface::GetScriptedThreadPluginName() { Status error; StructuredData::ObjectSP obj = Dispatch("get_scripted_thread_plugin", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return {}; return obj->GetStringValue().str(); @@ -193,7 +201,8 @@ StructuredData::DictionarySP ScriptedProcessPythonInterface::GetMetadata() { StructuredData::DictionarySP dict = Dispatch("get_process_metadata", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, + error)) return {}; return dict; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp index df9f7db6f62b02..0a1bcb5b92c731 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp @@ -40,7 +40,8 @@ ScriptedThreadPlanPythonInterface::ExplainsStop(Event *event) { Status error; StructuredData::ObjectSP obj = Dispatch("explains_stop", error, event); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return error.ToError(); return obj->GetBooleanValue(); @@ -51,7 +52,8 @@ ScriptedThreadPlanPythonInterface::ShouldStop(Event *event) { Status error; StructuredData::ObjectSP obj = Dispatch("should_stop", error, event); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return error.ToError(); return obj->GetBooleanValue(); @@ -61,7 +63,8 @@ llvm::Expected ScriptedThreadPlanPythonInterface::IsStale() { Status error; StructuredData::ObjectSP obj = Dispatch("is_stale", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return error.ToError(); return obj->GetBooleanValue(); @@ -71,7 +74,8 @@ lldb::StateType ScriptedThreadPlanPythonInterface::GetRunState() { Status error; StructuredData::ObjectSP obj = Dispatch("should_step", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return lldb::eStateStepping; return static_cast(obj->GetUnsignedIntegerValue( diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp index ba2ec0e78e9f53..8af89d761764bc 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp @@ -45,7 +45,8 @@ lldb::tid_t ScriptedThreadPythonInterface::GetThreadID() { Status error; StructuredData::ObjectSP obj = Dispatch("get_thread_id", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return LLDB_INVALID_THREAD_ID; return obj->GetUnsignedIntegerValue(LLDB_INVALID_THREAD_ID); @@ -55,7 +56,8 @@ std::optional ScriptedThreadPythonInterface::GetName() { Status error; StructuredData::ObjectSP obj = Dispatch("get_name", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return {}; return obj->GetStringValue().str(); @@ -65,7 +67,8 @@ lldb::StateType ScriptedThreadPythonInterface::GetState() { Status error; StructuredData::ObjectSP obj = Dispatch("get_state", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return eStateInvalid; return static_cast(obj->GetUnsignedIntegerValue(eStateInvalid)); @@ -75,7 +78,8 @@ std::optional ScriptedThreadPythonInterface::GetQueue() { Status error; StructuredData::ObjectSP obj = Dispatch("get_queue", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return {}; return obj->GetStringValue().str(); @@ -86,7 +90,8 @@ StructuredData::DictionarySP ScriptedThreadPythonInterface::GetStopReason() { StructuredData::DictionarySP dict = Dispatch("get_stop_reason", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, + error)) return {}; return dict; @@ -97,7 +102,8 @@ StructuredData::ArraySP ScriptedThreadPythonInterface::GetStackFrames() { StructuredData::ArraySP arr = Dispatch("get_stackframes", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, arr, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, arr, + error)) return {}; return arr; @@ -108,7 +114,8 @@ StructuredData::DictionarySP ScriptedThreadPythonInterface::GetRegisterInfo() { StructuredData::DictionarySP dict = Dispatch("get_register_info", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, + error)) return {}; return dict; @@ -118,7 +125,8 @@ std::optional ScriptedThreadPythonInterface::GetRegisterContext() { Status error; StructuredData::ObjectSP obj = Dispatch("get_register_context", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, + error)) return {}; return obj->GetAsString()->GetValue().str(); @@ -129,7 +137,8 @@ StructuredData::ArraySP ScriptedThreadPythonInterface::GetExtendedInfo() { StructuredData::ArraySP arr = Dispatch("get_extended_info", error); - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, arr, error)) + if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, arr, + error)) return {}; return arr; From 2b7ba0155dc06de1f14a2a085f423570c1c896d0 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Mon, 30 Oct 2023 17:38:36 -0700 Subject: [PATCH 103/144] Revert "[lldb] Fix build failure introduced in 484038416d06 (NFC)" This reverts commit ed5faa475b401aa91284d19afb52b2927aa59c97 since it introduces test failures: https://lab.llvm.org/buildbot/#/builders/68/builds/62556 --- .../ScriptedProcessPythonInterface.cpp | 27 +++++++------------ .../ScriptedThreadPlanPythonInterface.cpp | 12 +++------ .../ScriptedThreadPythonInterface.cpp | 27 +++++++------------ 3 files changed, 22 insertions(+), 44 deletions(-) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp index 313c597ce48f3c..e86b34d6b930e4 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedProcessPythonInterface.cpp @@ -49,8 +49,7 @@ StructuredData::DictionarySP ScriptedProcessPythonInterface::GetCapabilities() { StructuredData::DictionarySP dict = Dispatch("get_capabilities", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) return {}; return dict; @@ -91,8 +90,7 @@ StructuredData::DictionarySP ScriptedProcessPythonInterface::GetThreadsInfo() { StructuredData::DictionarySP dict = Dispatch("get_threads_info", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) return {}; return dict; @@ -108,8 +106,7 @@ bool ScriptedProcessPythonInterface::CreateBreakpoint(lldb::addr_t addr, if (py_error.Fail()) error = py_error; - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return {}; return obj->GetBooleanValue(); @@ -134,8 +131,7 @@ lldb::offset_t ScriptedProcessPythonInterface::WriteMemoryAtAddress( StructuredData::ObjectSP obj = Dispatch("write_memory_at_address", py_error, addr, data_sp, error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return LLDB_INVALID_OFFSET; // If there was an error on the python call, surface it to the user. @@ -150,8 +146,7 @@ StructuredData::ArraySP ScriptedProcessPythonInterface::GetLoadedImages() { StructuredData::ArraySP array = Dispatch("get_loaded_images", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, array, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, array, error)) return {}; return array; @@ -161,8 +156,7 @@ lldb::pid_t ScriptedProcessPythonInterface::GetProcessID() { Status error; StructuredData::ObjectSP obj = Dispatch("get_process_id", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return LLDB_INVALID_PROCESS_ID; return obj->GetUnsignedIntegerValue(LLDB_INVALID_PROCESS_ID); @@ -172,8 +166,7 @@ bool ScriptedProcessPythonInterface::IsAlive() { Status error; StructuredData::ObjectSP obj = Dispatch("is_alive", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return {}; return obj->GetBooleanValue(); @@ -184,8 +177,7 @@ ScriptedProcessPythonInterface::GetScriptedThreadPluginName() { Status error; StructuredData::ObjectSP obj = Dispatch("get_scripted_thread_plugin", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return {}; return obj->GetStringValue().str(); @@ -201,8 +193,7 @@ StructuredData::DictionarySP ScriptedProcessPythonInterface::GetMetadata() { StructuredData::DictionarySP dict = Dispatch("get_process_metadata", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) return {}; return dict; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp index 0a1bcb5b92c731..df9f7db6f62b02 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp @@ -40,8 +40,7 @@ ScriptedThreadPlanPythonInterface::ExplainsStop(Event *event) { Status error; StructuredData::ObjectSP obj = Dispatch("explains_stop", error, event); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return error.ToError(); return obj->GetBooleanValue(); @@ -52,8 +51,7 @@ ScriptedThreadPlanPythonInterface::ShouldStop(Event *event) { Status error; StructuredData::ObjectSP obj = Dispatch("should_stop", error, event); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return error.ToError(); return obj->GetBooleanValue(); @@ -63,8 +61,7 @@ llvm::Expected ScriptedThreadPlanPythonInterface::IsStale() { Status error; StructuredData::ObjectSP obj = Dispatch("is_stale", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return error.ToError(); return obj->GetBooleanValue(); @@ -74,8 +71,7 @@ lldb::StateType ScriptedThreadPlanPythonInterface::GetRunState() { Status error; StructuredData::ObjectSP obj = Dispatch("should_step", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return lldb::eStateStepping; return static_cast(obj->GetUnsignedIntegerValue( diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp index 8af89d761764bc..ba2ec0e78e9f53 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp @@ -45,8 +45,7 @@ lldb::tid_t ScriptedThreadPythonInterface::GetThreadID() { Status error; StructuredData::ObjectSP obj = Dispatch("get_thread_id", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return LLDB_INVALID_THREAD_ID; return obj->GetUnsignedIntegerValue(LLDB_INVALID_THREAD_ID); @@ -56,8 +55,7 @@ std::optional ScriptedThreadPythonInterface::GetName() { Status error; StructuredData::ObjectSP obj = Dispatch("get_name", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return {}; return obj->GetStringValue().str(); @@ -67,8 +65,7 @@ lldb::StateType ScriptedThreadPythonInterface::GetState() { Status error; StructuredData::ObjectSP obj = Dispatch("get_state", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return eStateInvalid; return static_cast(obj->GetUnsignedIntegerValue(eStateInvalid)); @@ -78,8 +75,7 @@ std::optional ScriptedThreadPythonInterface::GetQueue() { Status error; StructuredData::ObjectSP obj = Dispatch("get_queue", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return {}; return obj->GetStringValue().str(); @@ -90,8 +86,7 @@ StructuredData::DictionarySP ScriptedThreadPythonInterface::GetStopReason() { StructuredData::DictionarySP dict = Dispatch("get_stop_reason", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) return {}; return dict; @@ -102,8 +97,7 @@ StructuredData::ArraySP ScriptedThreadPythonInterface::GetStackFrames() { StructuredData::ArraySP arr = Dispatch("get_stackframes", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, arr, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, arr, error)) return {}; return arr; @@ -114,8 +108,7 @@ StructuredData::DictionarySP ScriptedThreadPythonInterface::GetRegisterInfo() { StructuredData::DictionarySP dict = Dispatch("get_register_info", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) return {}; return dict; @@ -125,8 +118,7 @@ std::optional ScriptedThreadPythonInterface::GetRegisterContext() { Status error; StructuredData::ObjectSP obj = Dispatch("get_register_context", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) return {}; return obj->GetAsString()->GetValue().str(); @@ -137,8 +129,7 @@ StructuredData::ArraySP ScriptedThreadPythonInterface::GetExtendedInfo() { StructuredData::ArraySP arr = Dispatch("get_extended_info", error); - if (!ScriptedInterface::CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, arr, - error)) + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, arr, error)) return {}; return arr; From 6eafe2cb7a3286c1b13eea7d8370374553fe81a9 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Mon, 30 Oct 2023 17:40:11 -0700 Subject: [PATCH 104/144] Revert "[lldb] Make use of Scripted{Python,}Interface for ScriptedThreadPlan (#70392)" This reverts commit 4b3cd379cce3f455bf3c8677ca7a5be6e708a4ce since it introduces some test failures: https://lab.llvm.org/buildbot/#/builders/68/builds/62556 --- lldb/bindings/python/python-swigsafecast.swig | 13 +- lldb/bindings/python/python-wrapper.swig | 153 +++++++++++++++--- lldb/include/lldb/API/SBEvent.h | 4 +- lldb/include/lldb/API/SBStream.h | 9 -- .../Interfaces/ScriptedInterface.h | 4 +- .../Interfaces/ScriptedThreadPlanInterface.h | 40 ----- .../lldb/Interpreter/ScriptInterpreter.h | 56 +++++-- lldb/include/lldb/Target/ThreadPlanPython.h | 2 - lldb/include/lldb/lldb-forward.h | 3 - lldb/source/Interpreter/ScriptInterpreter.cpp | 13 -- .../Python/Interfaces/CMakeLists.txt | 1 - .../ScriptedPlatformPythonInterface.cpp | 2 - .../Interfaces/ScriptedPythonInterface.cpp | 34 +--- .../Interfaces/ScriptedPythonInterface.h | 20 --- .../ScriptedThreadPlanPythonInterface.cpp | 92 ----------- .../ScriptedThreadPlanPythonInterface.h | 44 ----- .../ScriptedThreadPythonInterface.cpp | 1 - .../Python/SWIGPythonBridge.h | 21 ++- .../Python/ScriptInterpreterPython.cpp | 122 +++++++++++++- .../Python/ScriptInterpreterPythonImpl.h | 28 +++- lldb/source/Target/ThreadPlanPython.cpp | 97 +++++------ .../functionalities/step_scripted/Steps.py | 4 +- .../Python/PythonTestSuite.cpp | 45 +++--- 23 files changed, 407 insertions(+), 401 deletions(-) delete mode 100644 lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h delete mode 100644 lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp delete mode 100644 lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.h diff --git a/lldb/bindings/python/python-swigsafecast.swig b/lldb/bindings/python/python-swigsafecast.swig index fba3a77d8f2df4..d5ea5148727134 100644 --- a/lldb/bindings/python/python-swigsafecast.swig +++ b/lldb/bindings/python/python-swigsafecast.swig @@ -37,6 +37,10 @@ PythonObject SWIGBridge::ToSWIGWrapper(const Status& status) { return ToSWIGHelper(new lldb::SBError(status), SWIGTYPE_p_lldb__SBError); } +PythonObject SWIGBridge::ToSWIGWrapper(std::unique_ptr stream_sb) { + return ToSWIGHelper(stream_sb.release(), SWIGTYPE_p_lldb__SBStream); +} + PythonObject SWIGBridge::ToSWIGWrapper(std::unique_ptr data_sb) { return ToSWIGHelper(data_sb.release(), SWIGTYPE_p_lldb__SBStructuredData); } @@ -111,12 +115,9 @@ SWIGBridge::ToSWIGWrapper(CommandReturnObject &cmd_retobj) { SWIGTYPE_p_lldb__SBCommandReturnObject); } -PythonObject SWIGBridge::ToSWIGWrapper(const Stream *s) { - return ToSWIGHelper(new lldb::SBStream(), SWIGTYPE_p_lldb__SBStream); -} - -PythonObject SWIGBridge::ToSWIGWrapper(Event *event) { - return ToSWIGHelper(new lldb::SBEvent(event), SWIGTYPE_p_lldb__SBEvent); +ScopedPythonObject SWIGBridge::ToSWIGWrapper(Event *event) { + return ScopedPythonObject(new lldb::SBEvent(event), + SWIGTYPE_p_lldb__SBEvent); } PythonObject SWIGBridge::ToSWIGWrapper( diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index 5c28d652824073..17bc7b1f219870 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -229,6 +229,133 @@ PythonObject lldb_private::python::SWIGBridge::LLDBSwigPythonCreateCommandObject return pfunc(SWIGBridge::ToSWIGWrapper(std::move(debugger_sp)), dict); } +PythonObject lldb_private::python::SWIGBridge::LLDBSwigPythonCreateScriptedThreadPlan( + const char *python_class_name, const char *session_dictionary_name, + const lldb_private::StructuredDataImpl &args_impl, + std::string &error_string, const lldb::ThreadPlanSP &thread_plan_sp) { + if (python_class_name == NULL || python_class_name[0] == '\0' || + !session_dictionary_name) + return PythonObject(); + + PyErr_Cleaner py_err_cleaner(true); + + auto dict = PythonModule::MainModule().ResolveName( + session_dictionary_name); + auto pfunc = PythonObject::ResolveNameWithDictionary( + python_class_name, dict); + + if (!pfunc.IsAllocated()) { + error_string.append("could not find script class: "); + error_string.append(python_class_name); + return PythonObject(); + } + + PythonObject tp_arg = SWIGBridge::ToSWIGWrapper(thread_plan_sp); + + llvm::Expected arg_info = pfunc.GetArgInfo(); + if (!arg_info) { + llvm::handleAllErrors( + arg_info.takeError(), + [&](PythonException &E) { error_string.append(E.ReadBacktrace()); }, + [&](const llvm::ErrorInfoBase &E) { + error_string.append(E.message()); + }); + return PythonObject(); + } + + PythonObject result = {}; + auto args_sb = std::unique_ptr(new lldb::SBStructuredData(args_impl)); + if (arg_info.get().max_positional_args == 2) { + if (args_sb->IsValid()) { + error_string.assign( + "args passed, but __init__ does not take an args dictionary"); + return PythonObject(); + } + result = pfunc(tp_arg, dict); + } else if (arg_info.get().max_positional_args >= 3) { + result = pfunc(tp_arg, SWIGBridge::ToSWIGWrapper(std::move(args_sb)), dict); + } else { + error_string.assign("wrong number of arguments in __init__, should be 2 or " + "3 (not including self)"); + return PythonObject(); + } + + // FIXME: At this point we should check that the class we found supports all + // the methods that we need. + + return result; +} + +bool lldb_private::python::SWIGBridge::LLDBSWIGPythonCallThreadPlan( + void *implementor, const char *method_name, lldb_private::Event *event, + bool &got_error) { + got_error = false; + + PyErr_Cleaner py_err_cleaner(false); + PythonObject self(PyRefType::Borrowed, static_cast(implementor)); + auto pfunc = self.ResolveName(method_name); + + if (!pfunc.IsAllocated()) + return false; + + PythonObject result; + if (event != nullptr) { + ScopedPythonObject event_arg = SWIGBridge::ToSWIGWrapper(event); + result = pfunc(event_arg.obj()); + } else + result = pfunc(); + + if (PyErr_Occurred()) { + got_error = true; + printf("Return value was neither false nor true for call to %s.\n", + method_name); + PyErr_Print(); + return false; + } + + if (result.get() == Py_True) + return true; + else if (result.get() == Py_False) + return false; + + // Somebody returned the wrong thing... + got_error = true; + printf("Wrong return value type for call to %s.\n", method_name); + return false; +} + +bool lldb_private::python::SWIGBridge::LLDBSWIGPythonCallThreadPlan( + void *implementor, const char *method_name, lldb_private::Stream *stream, + bool &got_error) { + got_error = false; + + PyErr_Cleaner py_err_cleaner(false); + PythonObject self(PyRefType::Borrowed, static_cast(implementor)); + auto pfunc = self.ResolveName(method_name); + + if (!pfunc.IsAllocated()) + return false; + + auto *sb_stream = new lldb::SBStream(); + PythonObject sb_stream_arg = + SWIGBridge::ToSWIGWrapper(std::unique_ptr(sb_stream)); + + PythonObject result; + result = pfunc(sb_stream_arg); + + if (PyErr_Occurred()) { + printf("Error occured for call to %s.\n", + method_name); + PyErr_Print(); + got_error = true; + return false; + } + if (stream) + stream->PutCString(sb_stream->GetData()); + return true; + +} + PythonObject lldb_private::python::SWIGBridge::LLDBSwigPythonCreateScriptedBreakpointResolver( const char *python_class_name, const char *session_dictionary_name, const StructuredDataImpl &args_impl, @@ -375,7 +502,7 @@ bool lldb_private::python::SWIGBridge::LLDBSwigPythonStopHookCallHandleStop( auto *sb_stream = new lldb::SBStream(); PythonObject sb_stream_arg = - SWIGBridge::ToSWIGWrapper(stream.get()); + SWIGBridge::ToSWIGWrapper(std::unique_ptr(sb_stream)); PythonObject result = pfunc(SWIGBridge::ToSWIGWrapper(std::move(exc_ctx_sp)), sb_stream_arg); @@ -626,30 +753,6 @@ void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBError(PyObject * data return sb_ptr; } -void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBEvent(PyObject * data) { - lldb::SBEvent *sb_ptr = nullptr; - - int valid_cast = - SWIG_ConvertPtr(data, (void **)&sb_ptr, SWIGTYPE_p_lldb__SBEvent, 0); - - if (valid_cast == -1) - return NULL; - - return sb_ptr; -} - -void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBStream(PyObject * data) { - lldb::SBStream *sb_ptr = nullptr; - - int valid_cast = - SWIG_ConvertPtr(data, (void **)&sb_ptr, SWIGTYPE_p_lldb__SBStream, 0); - - if (valid_cast == -1) - return NULL; - - return sb_ptr; -} - void *lldb_private::python::LLDBSWIGPython_CastPyObjectToSBValue(PyObject * data) { lldb::SBValue *sb_ptr = NULL; diff --git a/lldb/include/lldb/API/SBEvent.h b/lldb/include/lldb/API/SBEvent.h index 85b401ca8cc100..cc116766e85f4a 100644 --- a/lldb/include/lldb/API/SBEvent.h +++ b/lldb/include/lldb/API/SBEvent.h @@ -15,7 +15,6 @@ #include namespace lldb_private { -class ScriptInterpreter; namespace python { class SWIGBridge; } @@ -74,12 +73,11 @@ class LLDB_API SBEvent { friend class SBThread; friend class SBWatchpoint; - friend class lldb_private::ScriptInterpreter; friend class lldb_private::python::SWIGBridge; SBEvent(lldb::EventSP &event_sp); - SBEvent(lldb_private::Event *event); + SBEvent(lldb_private::Event *event_sp); lldb::EventSP &GetSP() const; diff --git a/lldb/include/lldb/API/SBStream.h b/lldb/include/lldb/API/SBStream.h index ee329737d594b5..0e33f05b69916f 100644 --- a/lldb/include/lldb/API/SBStream.h +++ b/lldb/include/lldb/API/SBStream.h @@ -13,13 +13,6 @@ #include "lldb/API/SBDefines.h" -namespace lldb_private { -class ScriptInterpreter; -namespace python { -class SWIGBridge; -} -} // namespace lldb_private - namespace lldb { class LLDB_API SBStream { @@ -108,8 +101,6 @@ class LLDB_API SBStream { friend class SBValue; friend class SBWatchpoint; - friend class lldb_private::ScriptInterpreter; - lldb_private::Stream *operator->(); lldb_private::Stream *get(); diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h index fc0e488da69829..e4816352daa5db 100644 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h +++ b/lldb/include/lldb/Interpreter/Interfaces/ScriptedInterface.h @@ -10,6 +10,7 @@ #define LLDB_INTERPRETER_INTERFACES_SCRIPTEDINTERFACE_H #include "lldb/Core/StructuredDataImpl.h" +#include "lldb/Target/ExecutionContext.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/UnimplementedError.h" @@ -49,8 +50,7 @@ class ScriptedInterface { } template - static bool CheckStructuredDataObject(llvm::StringRef caller, T obj, - Status &error) { + bool CheckStructuredDataObject(llvm::StringRef caller, T obj, Status &error) { if (!obj) return ErrorWithMessage(caller, "Null Structured Data object", error); diff --git a/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h b/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h deleted file mode 100644 index 4dadda4d978985..00000000000000 --- a/lldb/include/lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h +++ /dev/null @@ -1,40 +0,0 @@ -//===-- ScriptedThreadPlanInterface.h ---------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLDB_INTERPRETER_INTERFACES_SCRIPTEDTHREADPLANINTERFACE_H -#define LLDB_INTERPRETER_INTERFACES_SCRIPTEDTHREADPLANINTERFACE_H - -#include "lldb/lldb-private.h" - -#include "ScriptedInterface.h" - -namespace lldb_private { -class ScriptedThreadPlanInterface : public ScriptedInterface { -public: - virtual llvm::Expected - CreatePluginObject(llvm::StringRef class_name, - lldb::ThreadPlanSP thread_plan_sp, - const StructuredDataImpl &args_sp) { - llvm_unreachable("unimplemented!"); - } - - virtual llvm::Expected ExplainsStop(Event *event) { return true; } - - virtual llvm::Expected ShouldStop(Event *event) { return true; } - - virtual llvm::Expected IsStale() { return true; }; - - virtual lldb::StateType GetRunState() { return lldb::eStateStepping; } - - virtual llvm::Expected GetStopDescription(lldb_private::Stream *s) { - return true; - } -}; -} // namespace lldb_private - -#endif // LLDB_INTERPRETER_INTERFACES_SCRIPTEDTHREADPLANINTERFACE_H diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h index 7e2a7286e20422..0146eeb8626200 100644 --- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h +++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h @@ -13,10 +13,8 @@ #include "lldb/API/SBBreakpoint.h" #include "lldb/API/SBData.h" #include "lldb/API/SBError.h" -#include "lldb/API/SBEvent.h" #include "lldb/API/SBLaunchInfo.h" #include "lldb/API/SBMemoryRegionInfo.h" -#include "lldb/API/SBStream.h" #include "lldb/Breakpoint/BreakpointOptions.h" #include "lldb/Core/PluginInterface.h" #include "lldb/Core/SearchFilter.h" @@ -27,7 +25,6 @@ #include "lldb/Interpreter/Interfaces/ScriptedPlatformInterface.h" #include "lldb/Interpreter/Interfaces/ScriptedProcessInterface.h" #include "lldb/Interpreter/Interfaces/ScriptedThreadInterface.h" -#include "lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h" #include "lldb/Interpreter/ScriptObject.h" #include "lldb/Utility/Broadcaster.h" #include "lldb/Utility/Status.h" @@ -256,6 +253,50 @@ class ScriptInterpreter : public PluginInterface { return lldb::ValueObjectListSP(); } + virtual StructuredData::ObjectSP + CreateScriptedThreadPlan(const char *class_name, + const StructuredDataImpl &args_data, + std::string &error_str, + lldb::ThreadPlanSP thread_plan_sp) { + return StructuredData::ObjectSP(); + } + + virtual bool + ScriptedThreadPlanExplainsStop(StructuredData::ObjectSP implementor_sp, + Event *event, bool &script_error) { + script_error = true; + return true; + } + + virtual bool + ScriptedThreadPlanShouldStop(StructuredData::ObjectSP implementor_sp, + Event *event, bool &script_error) { + script_error = true; + return true; + } + + virtual bool + ScriptedThreadPlanIsStale(StructuredData::ObjectSP implementor_sp, + bool &script_error) { + script_error = true; + return true; + } + + virtual lldb::StateType + ScriptedThreadPlanGetRunState(StructuredData::ObjectSP implementor_sp, + bool &script_error) { + script_error = true; + return lldb::eStateStepping; + } + + virtual bool + ScriptedThreadPlanGetStopDescription(StructuredData::ObjectSP implementor_sp, + lldb_private::Stream *stream, + bool &script_error) { + script_error = true; + return false; + } + virtual StructuredData::GenericSP CreateScriptedBreakpointResolver(const char *class_name, const StructuredDataImpl &args_data, @@ -525,11 +566,6 @@ class ScriptInterpreter : public PluginInterface { return std::make_shared(); } - virtual lldb::ScriptedThreadPlanInterfaceSP - CreateScriptedThreadPlanInterface() { - return std::make_shared(); - } - virtual lldb::OperatingSystemInterfaceSP CreateOperatingSystemInterface() { return std::make_shared(); } @@ -548,10 +584,6 @@ class ScriptInterpreter : public PluginInterface { Status GetStatusFromSBError(const lldb::SBError &error) const; - Event *GetOpaqueTypeFromSBEvent(const lldb::SBEvent &event) const; - - Stream *GetOpaqueTypeFromSBStream(const lldb::SBStream &stream) const; - lldb::BreakpointSP GetOpaqueTypeFromSBBreakpoint(const lldb::SBBreakpoint &breakpoint) const; diff --git a/lldb/include/lldb/Target/ThreadPlanPython.h b/lldb/include/lldb/Target/ThreadPlanPython.h index da106faf951db1..64854d66b8f258 100644 --- a/lldb/include/lldb/Target/ThreadPlanPython.h +++ b/lldb/include/lldb/Target/ThreadPlanPython.h @@ -13,7 +13,6 @@ #include #include "lldb/Core/StructuredDataImpl.h" -#include "lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h" #include "lldb/Target/Process.h" #include "lldb/Target/StopInfo.h" #include "lldb/Target/Target.h" @@ -71,7 +70,6 @@ class ThreadPlanPython : public ThreadPlan { StreamString m_stop_description; // Cache the stop description here bool m_did_push; bool m_stop_others; - lldb::ScriptedThreadPlanInterfaceSP m_interface; ThreadPlanPython(const ThreadPlanPython &) = delete; const ThreadPlanPython &operator=(const ThreadPlanPython &) = delete; diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index 6138e6fe5a60b4..aa099d4abc3b09 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -185,7 +185,6 @@ class ScriptedMetadata; class ScriptedPlatformInterface; class ScriptedProcessInterface; class ScriptedThreadInterface; -class ScriptedThreadPlanInterface; class ScriptedSyntheticChildren; class SearchFilter; class Section; @@ -394,8 +393,6 @@ typedef std::unique_ptr ScriptedProcessInterfaceUP; typedef std::shared_ptr ScriptedThreadInterfaceSP; -typedef std::shared_ptr - ScriptedThreadPlanInterfaceSP; typedef std::shared_ptr SectionSP; typedef std::unique_ptr SectionListUP; typedef std::weak_ptr SectionWP; diff --git a/lldb/source/Interpreter/ScriptInterpreter.cpp b/lldb/source/Interpreter/ScriptInterpreter.cpp index aee2ec94d7979a..fb3fa74d0b9780 100644 --- a/lldb/source/Interpreter/ScriptInterpreter.cpp +++ b/lldb/source/Interpreter/ScriptInterpreter.cpp @@ -104,19 +104,6 @@ ScriptInterpreter::GetStatusFromSBError(const lldb::SBError &error) const { return Status(); } -Event * -ScriptInterpreter::GetOpaqueTypeFromSBEvent(const lldb::SBEvent &event) const { - return event.m_opaque_ptr; -} - -Stream *ScriptInterpreter::GetOpaqueTypeFromSBStream( - const lldb::SBStream &stream) const { - if (stream.m_opaque_up) - return const_cast(stream).m_opaque_up.get(); - - return nullptr; -} - std::optional ScriptInterpreter::GetOpaqueTypeFromSBMemoryRegionInfo( const lldb::SBMemoryRegionInfo &mem_region) const { diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt index c60e4bb503a371..b22abc49c92a9a 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/CMakeLists.txt @@ -24,7 +24,6 @@ add_lldb_library(lldbPluginScriptInterpreterPythonInterfaces ScriptedPythonInterface.cpp ScriptedProcessPythonInterface.cpp ScriptedThreadPythonInterface.cpp - ScriptedThreadPlanPythonInterface.cpp ScriptedPlatformPythonInterface.cpp LINK_LIBS diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp index 6e93bec80056ee..9ba4731032bd35 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPlatformPythonInterface.cpp @@ -20,8 +20,6 @@ #include "../ScriptInterpreterPythonImpl.h" #include "ScriptedPlatformPythonInterface.h" -#include "lldb/Target/ExecutionContext.h" - using namespace lldb; using namespace lldb_private; using namespace lldb_private::python; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp index 7d072212676e13..6f22503b279ca6 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.cpp @@ -26,15 +26,6 @@ ScriptedPythonInterface::ScriptedPythonInterface( ScriptInterpreterPythonImpl &interpreter) : ScriptedInterface(), m_interpreter(interpreter) {} -template <> -void ScriptedPythonInterface::ReverseTransform( - lldb_private::Stream *&original_arg, python::PythonObject transformed_arg, - Status &error) { - Stream *s = ExtractValueFromPythonObject(transformed_arg, error); - *original_arg = *s; - original_arg->PutCString(static_cast(s)->GetData()); -} - template <> StructuredData::ArraySP ScriptedPythonInterface::ExtractValueFromPythonObject( @@ -57,33 +48,12 @@ Status ScriptedPythonInterface::ExtractValueFromPythonObject( if (lldb::SBError *sb_error = reinterpret_cast( python::LLDBSWIGPython_CastPyObjectToSBError(p.get()))) return m_interpreter.GetStatusFromSBError(*sb_error); - error.SetErrorString("Couldn't cast lldb::SBError to lldb::Status."); + else + error.SetErrorString("Couldn't cast lldb::SBError to lldb::Status."); return {}; } -template <> -Event *ScriptedPythonInterface::ExtractValueFromPythonObject( - python::PythonObject &p, Status &error) { - if (lldb::SBEvent *sb_event = reinterpret_cast( - python::LLDBSWIGPython_CastPyObjectToSBEvent(p.get()))) - return m_interpreter.GetOpaqueTypeFromSBEvent(*sb_event); - error.SetErrorString("Couldn't cast lldb::SBEvent to lldb_private::Event."); - - return nullptr; -} - -template <> -Stream *ScriptedPythonInterface::ExtractValueFromPythonObject( - python::PythonObject &p, Status &error) { - if (lldb::SBStream *sb_stream = reinterpret_cast( - python::LLDBSWIGPython_CastPyObjectToSBStream(p.get()))) - return m_interpreter.GetOpaqueTypeFromSBStream(*sb_stream); - error.SetErrorString("Couldn't cast lldb::SBStream to lldb_private::Stream."); - - return nullptr; -} - template <> lldb::DataExtractorSP ScriptedPythonInterface::ExtractValueFromPythonObject( diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h index cc760938c89959..7af98163970999 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h @@ -224,10 +224,6 @@ class ScriptedPythonInterface : virtual public ScriptedInterface { return python::SWIGBridge::ToSWIGWrapper(arg); } - python::PythonObject Transform(lldb::ThreadPlanSP arg) { - return python::SWIGBridge::ToSWIGWrapper(arg); - } - python::PythonObject Transform(lldb::ProcessAttachInfoSP arg) { return python::SWIGBridge::ToSWIGWrapper(arg); } @@ -236,14 +232,6 @@ class ScriptedPythonInterface : virtual public ScriptedInterface { return python::SWIGBridge::ToSWIGWrapper(arg); } - python::PythonObject Transform(Event *arg) { - return python::SWIGBridge::ToSWIGWrapper(arg); - } - - python::PythonObject Transform(Stream *arg) { - return python::SWIGBridge::ToSWIGWrapper(arg); - } - python::PythonObject Transform(lldb::DataExtractorSP arg) { return python::SWIGBridge::ToSWIGWrapper(arg); } @@ -341,14 +329,6 @@ template <> Status ScriptedPythonInterface::ExtractValueFromPythonObject( python::PythonObject &p, Status &error); -template <> -Event *ScriptedPythonInterface::ExtractValueFromPythonObject( - python::PythonObject &p, Status &error); - -template <> -Stream *ScriptedPythonInterface::ExtractValueFromPythonObject( - python::PythonObject &p, Status &error); - template <> lldb::BreakpointSP ScriptedPythonInterface::ExtractValueFromPythonObject( diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp deleted file mode 100644 index df9f7db6f62b02..00000000000000 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.cpp +++ /dev/null @@ -1,92 +0,0 @@ -//===-- ScriptedThreadPlanPythonInterface.cpp -----------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "lldb/Host/Config.h" -#include "lldb/Utility/Log.h" -#include "lldb/lldb-enumerations.h" - -#if LLDB_ENABLE_PYTHON - -// LLDB Python header must be included first -#include "../lldb-python.h" - -#include "../SWIGPythonBridge.h" -#include "../ScriptInterpreterPythonImpl.h" -#include "ScriptedThreadPlanPythonInterface.h" - -using namespace lldb; -using namespace lldb_private; -using namespace lldb_private::python; - -ScriptedThreadPlanPythonInterface::ScriptedThreadPlanPythonInterface( - ScriptInterpreterPythonImpl &interpreter) - : ScriptedThreadPlanInterface(), ScriptedPythonInterface(interpreter) {} - -llvm::Expected -ScriptedThreadPlanPythonInterface::CreatePluginObject( - const llvm::StringRef class_name, lldb::ThreadPlanSP thread_plan_sp, - const StructuredDataImpl &args_sp) { - return ScriptedPythonInterface::CreatePluginObject(class_name, nullptr, - thread_plan_sp, args_sp); -} - -llvm::Expected -ScriptedThreadPlanPythonInterface::ExplainsStop(Event *event) { - Status error; - StructuredData::ObjectSP obj = Dispatch("explains_stop", error, event); - - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) - return error.ToError(); - - return obj->GetBooleanValue(); -} - -llvm::Expected -ScriptedThreadPlanPythonInterface::ShouldStop(Event *event) { - Status error; - StructuredData::ObjectSP obj = Dispatch("should_stop", error, event); - - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) - return error.ToError(); - - return obj->GetBooleanValue(); -} - -llvm::Expected ScriptedThreadPlanPythonInterface::IsStale() { - Status error; - StructuredData::ObjectSP obj = Dispatch("is_stale", error); - - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) - return error.ToError(); - - return obj->GetBooleanValue(); -} - -lldb::StateType ScriptedThreadPlanPythonInterface::GetRunState() { - Status error; - StructuredData::ObjectSP obj = Dispatch("should_step", error); - - if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error)) - return lldb::eStateStepping; - - return static_cast(obj->GetUnsignedIntegerValue( - static_cast(lldb::eStateStepping))); -} - -llvm::Expected -ScriptedThreadPlanPythonInterface::GetStopDescription(lldb_private::Stream *s) { - Status error; - Dispatch("stop_description", error, s); - - if (error.Fail()) - return error.ToError(); - - return true; -} - -#endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.h deleted file mode 100644 index 2eb986e0282f0f..00000000000000 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPlanPythonInterface.h +++ /dev/null @@ -1,44 +0,0 @@ -//===-- ScriptedThreadPlanPythonInterface.h ---------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDTHREADPLANPYTHONINTERFACE_H -#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDTHREADPLANPYTHONINTERFACE_H - -#include "lldb/Host/Config.h" - -#if LLDB_ENABLE_PYTHON - -#include "ScriptedPythonInterface.h" -#include "lldb/Interpreter/Interfaces/ScriptedThreadPlanInterface.h" -#include - -namespace lldb_private { -class ScriptedThreadPlanPythonInterface : public ScriptedThreadPlanInterface, - public ScriptedPythonInterface { -public: - ScriptedThreadPlanPythonInterface(ScriptInterpreterPythonImpl &interpreter); - - llvm::Expected - CreatePluginObject(const llvm::StringRef class_name, - lldb::ThreadPlanSP thread_plan_sp, - const StructuredDataImpl &args_sp) override; - - llvm::Expected ExplainsStop(Event *event) override; - - llvm::Expected ShouldStop(Event *event) override; - - llvm::Expected IsStale() override; - - lldb::StateType GetRunState() override; - - llvm::Expected GetStopDescription(lldb_private::Stream *s) override; -}; -} // namespace lldb_private - -#endif // LLDB_ENABLE_PYTHON -#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_INTERFACES_SCRIPTEDTHREADPLANPYTHONINTERFACE_H diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp index ba2ec0e78e9f53..18e268527eb2fb 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedThreadPythonInterface.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/Host/Config.h" -#include "lldb/Target/ExecutionContext.h" #include "lldb/Utility/Log.h" #include "lldb/lldb-enumerations.h" diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h index 41f3a80a02b13b..7cdd5577919ba8 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h @@ -96,13 +96,12 @@ class SWIGBridge { static PythonObject ToSWIGWrapper(lldb::ExecutionContextRefSP ctx_sp); static PythonObject ToSWIGWrapper(const TypeSummaryOptions &summary_options); static PythonObject ToSWIGWrapper(const SymbolContext &sym_ctx); - static PythonObject ToSWIGWrapper(const Stream *stream); - static PythonObject ToSWIGWrapper(Event *event); static PythonObject ToSWIGWrapper(lldb::ProcessAttachInfoSP attach_info_sp); static PythonObject ToSWIGWrapper(lldb::ProcessLaunchInfoSP launch_info_sp); static PythonObject ToSWIGWrapper(lldb::DataExtractorSP data_extractor_sp); + static PythonObject ToSWIGWrapper(std::unique_ptr stream_sb); static PythonObject ToSWIGWrapper(std::unique_ptr data_sb); static PythonObject @@ -112,6 +111,7 @@ class SWIGBridge { static python::ScopedPythonObject ToSWIGWrapper(CommandReturnObject &cmd_retobj); + static python::ScopedPythonObject ToSWIGWrapper(Event *event); // These prototypes are the Pythonic implementations of the required // callbacks. Although these are scripting-language specific, their definition // depends on the public API. @@ -146,6 +146,21 @@ class SWIGBridge { const char *session_dictionary_name, lldb::DebuggerSP debugger_sp); + static python::PythonObject LLDBSwigPythonCreateScriptedThreadPlan( + const char *python_class_name, const char *session_dictionary_name, + const StructuredDataImpl &args_data, std::string &error_string, + const lldb::ThreadPlanSP &thread_plan_sp); + + static bool LLDBSWIGPythonCallThreadPlan(void *implementor, + const char *method_name, + lldb_private::Event *event_sp, + bool &got_error); + + static bool LLDBSWIGPythonCallThreadPlan(void *implementor, + const char *method_name, + lldb_private::Stream *stream, + bool &got_error); + static python::PythonObject LLDBSwigPythonCreateScriptedBreakpointResolver( const char *python_class_name, const char *session_dictionary_name, const StructuredDataImpl &args, const lldb::BreakpointSP &bkpt_sp); @@ -247,8 +262,6 @@ void *LLDBSWIGPython_CastPyObjectToSBBreakpoint(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBAttachInfo(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBLaunchInfo(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBError(PyObject *data); -void *LLDBSWIGPython_CastPyObjectToSBEvent(PyObject *data); -void *LLDBSWIGPython_CastPyObjectToSBStream(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBValue(PyObject *data); void *LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo(PyObject *data); } // namespace python diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index b71f856efda2ea..953f8b3aba18f7 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -17,7 +17,6 @@ #include "Interfaces/OperatingSystemPythonInterface.h" #include "Interfaces/ScriptedPlatformPythonInterface.h" #include "Interfaces/ScriptedProcessPythonInterface.h" -#include "Interfaces/ScriptedThreadPlanPythonInterface.h" #include "Interfaces/ScriptedThreadPythonInterface.h" #include "PythonDataObjects.h" #include "PythonReadline.h" @@ -1536,11 +1535,6 @@ ScriptInterpreterPythonImpl::CreateScriptedThreadInterface() { return std::make_shared(*this); } -ScriptedThreadPlanInterfaceSP -ScriptInterpreterPythonImpl::CreateScriptedThreadPlanInterface() { - return std::make_shared(*this); -} - OperatingSystemInterfaceSP ScriptInterpreterPythonImpl::CreateOperatingSystemInterface() { return std::make_shared(*this); @@ -1557,6 +1551,122 @@ ScriptInterpreterPythonImpl::CreateStructuredDataFromScriptObject( return py_obj.CreateStructuredObject(); } +StructuredData::ObjectSP ScriptInterpreterPythonImpl::CreateScriptedThreadPlan( + const char *class_name, const StructuredDataImpl &args_data, + std::string &error_str, lldb::ThreadPlanSP thread_plan_sp) { + if (class_name == nullptr || class_name[0] == '\0') + return StructuredData::ObjectSP(); + + if (!thread_plan_sp.get()) + return {}; + + Debugger &debugger = thread_plan_sp->GetTarget().GetDebugger(); + ScriptInterpreterPythonImpl *python_interpreter = + GetPythonInterpreter(debugger); + + if (!python_interpreter) + return {}; + + Locker py_lock(this, + Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); + PythonObject ret_val = SWIGBridge::LLDBSwigPythonCreateScriptedThreadPlan( + class_name, python_interpreter->m_dictionary_name.c_str(), args_data, + error_str, thread_plan_sp); + if (!ret_val) + return {}; + + return StructuredData::ObjectSP( + new StructuredPythonObject(std::move(ret_val))); +} + +bool ScriptInterpreterPythonImpl::ScriptedThreadPlanExplainsStop( + StructuredData::ObjectSP implementor_sp, Event *event, bool &script_error) { + bool explains_stop = true; + StructuredData::Generic *generic = nullptr; + if (implementor_sp) + generic = implementor_sp->GetAsGeneric(); + if (generic) { + Locker py_lock(this, + Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); + explains_stop = SWIGBridge::LLDBSWIGPythonCallThreadPlan( + generic->GetValue(), "explains_stop", event, script_error); + if (script_error) + return true; + } + return explains_stop; +} + +bool ScriptInterpreterPythonImpl::ScriptedThreadPlanShouldStop( + StructuredData::ObjectSP implementor_sp, Event *event, bool &script_error) { + bool should_stop = true; + StructuredData::Generic *generic = nullptr; + if (implementor_sp) + generic = implementor_sp->GetAsGeneric(); + if (generic) { + Locker py_lock(this, + Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); + should_stop = SWIGBridge::LLDBSWIGPythonCallThreadPlan( + generic->GetValue(), "should_stop", event, script_error); + if (script_error) + return true; + } + return should_stop; +} + +bool ScriptInterpreterPythonImpl::ScriptedThreadPlanIsStale( + StructuredData::ObjectSP implementor_sp, bool &script_error) { + bool is_stale = true; + StructuredData::Generic *generic = nullptr; + if (implementor_sp) + generic = implementor_sp->GetAsGeneric(); + if (generic) { + Locker py_lock(this, + Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); + is_stale = SWIGBridge::LLDBSWIGPythonCallThreadPlan( + generic->GetValue(), "is_stale", (Event *)nullptr, script_error); + if (script_error) + return true; + } + return is_stale; +} + +lldb::StateType ScriptInterpreterPythonImpl::ScriptedThreadPlanGetRunState( + StructuredData::ObjectSP implementor_sp, bool &script_error) { + bool should_step = false; + StructuredData::Generic *generic = nullptr; + if (implementor_sp) + generic = implementor_sp->GetAsGeneric(); + if (generic) { + Locker py_lock(this, + Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); + should_step = SWIGBridge::LLDBSWIGPythonCallThreadPlan( + generic->GetValue(), "should_step", (Event *)nullptr, script_error); + if (script_error) + should_step = true; + } + if (should_step) + return lldb::eStateStepping; + return lldb::eStateRunning; +} + +bool +ScriptInterpreterPythonImpl::ScriptedThreadPlanGetStopDescription( + StructuredData::ObjectSP implementor_sp, lldb_private::Stream *stream, + bool &script_error) { + StructuredData::Generic *generic = nullptr; + if (implementor_sp) + generic = implementor_sp->GetAsGeneric(); + if (!generic) { + script_error = true; + return false; + } + Locker py_lock(this, + Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); + return SWIGBridge::LLDBSWIGPythonCallThreadPlan( + generic->GetValue(), "stop_description", stream, script_error); +} + + StructuredData::GenericSP ScriptInterpreterPythonImpl::CreateScriptedBreakpointResolver( const char *class_name, const StructuredDataImpl &args_data, diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h index da8e3a63d08470..a33499816d8d38 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h @@ -77,9 +77,34 @@ class ScriptInterpreterPythonImpl : public ScriptInterpreterPython { StructuredData::GenericSP CreateScriptCommandObject(const char *class_name) override; + StructuredData::ObjectSP + CreateScriptedThreadPlan(const char *class_name, + const StructuredDataImpl &args_data, + std::string &error_str, + lldb::ThreadPlanSP thread_plan) override; + StructuredData::ObjectSP CreateStructuredDataFromScriptObject(ScriptObject obj) override; + bool ScriptedThreadPlanExplainsStop(StructuredData::ObjectSP implementor_sp, + Event *event, + bool &script_error) override; + + bool ScriptedThreadPlanShouldStop(StructuredData::ObjectSP implementor_sp, + Event *event, bool &script_error) override; + + bool ScriptedThreadPlanIsStale(StructuredData::ObjectSP implementor_sp, + bool &script_error) override; + + lldb::StateType + ScriptedThreadPlanGetRunState(StructuredData::ObjectSP implementor_sp, + bool &script_error) override; + + bool + ScriptedThreadPlanGetStopDescription(StructuredData::ObjectSP implementor_sp, + lldb_private::Stream *s, + bool &script_error) override; + StructuredData::GenericSP CreateScriptedBreakpointResolver(const char *class_name, const StructuredDataImpl &args_data, @@ -111,9 +136,6 @@ class ScriptInterpreterPythonImpl : public ScriptInterpreterPython { lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() override; - lldb::ScriptedThreadPlanInterfaceSP - CreateScriptedThreadPlanInterface() override; - lldb::OperatingSystemInterfaceSP CreateOperatingSystemInterface() override; StructuredData::ObjectSP diff --git a/lldb/source/Target/ThreadPlanPython.cpp b/lldb/source/Target/ThreadPlanPython.cpp index 48b3e3a1bcab61..d6de6b3c3cf049 100644 --- a/lldb/source/Target/ThreadPlanPython.cpp +++ b/lldb/source/Target/ThreadPlanPython.cpp @@ -32,23 +32,6 @@ ThreadPlanPython::ThreadPlanPython(Thread &thread, const char *class_name, eVoteNoOpinion, eVoteNoOpinion), m_class_name(class_name), m_args_data(args_data), m_did_push(false), m_stop_others(false) { - ScriptInterpreter *interpreter = GetScriptInterpreter(); - if (!interpreter) { - SetPlanComplete(false); - // FIXME: error handling - return; - } - - m_interface = interpreter->CreateScriptedThreadPlanInterface(); - if (!m_interface) { - SetPlanComplete(false); - // FIXME: error handling - // error.SetErrorStringWithFormat( - // "ThreadPlanPython::%s () - ERROR: %s", __FUNCTION__, - // "Script interpreter couldn't create Scripted Thread Plan Interface"); - return; - } - SetIsControllingPlan(true); SetOkayToDiscard(true); SetPrivate(false); @@ -77,14 +60,13 @@ void ThreadPlanPython::DidPush() { // We set up the script side in DidPush, so that it can push other plans in // the constructor, and doesn't have to care about the details of DidPush. m_did_push = true; - if (m_interface) { - auto obj_or_err = m_interface->CreatePluginObject( - m_class_name, this->shared_from_this(), m_args_data); - if (!obj_or_err) { - m_error_str = llvm::toString(obj_or_err.takeError()); - SetPlanComplete(false); - } else - m_implementation_sp = *obj_or_err; + if (!m_class_name.empty()) { + ScriptInterpreter *script_interp = GetScriptInterpreter(); + if (script_interp) { + m_implementation_sp = script_interp->CreateScriptedThreadPlan( + m_class_name.c_str(), m_args_data, m_error_str, + this->shared_from_this()); + } } } @@ -95,13 +77,14 @@ bool ThreadPlanPython::ShouldStop(Event *event_ptr) { bool should_stop = true; if (m_implementation_sp) { - auto should_stop_or_err = m_interface->ShouldStop(event_ptr); - if (!should_stop_or_err) { - LLDB_LOG_ERROR(GetLog(LLDBLog::Thread), should_stop_or_err.takeError(), - "Can't call ScriptedThreadPlan::ShouldStop."); - SetPlanComplete(false); - } else - should_stop = *should_stop_or_err; + ScriptInterpreter *script_interp = GetScriptInterpreter(); + if (script_interp) { + bool script_error; + should_stop = script_interp->ScriptedThreadPlanShouldStop( + m_implementation_sp, event_ptr, script_error); + if (script_error) + SetPlanComplete(false); + } } return should_stop; } @@ -113,13 +96,14 @@ bool ThreadPlanPython::IsPlanStale() { bool is_stale = true; if (m_implementation_sp) { - auto is_stale_or_err = m_interface->IsStale(); - if (!is_stale_or_err) { - LLDB_LOG_ERROR(GetLog(LLDBLog::Thread), is_stale_or_err.takeError(), - "Can't call ScriptedThreadPlan::IsStale."); - SetPlanComplete(false); - } else - is_stale = *is_stale_or_err; + ScriptInterpreter *script_interp = GetScriptInterpreter(); + if (script_interp) { + bool script_error; + is_stale = script_interp->ScriptedThreadPlanIsStale(m_implementation_sp, + script_error); + if (script_error) + SetPlanComplete(false); + } } return is_stale; } @@ -131,14 +115,14 @@ bool ThreadPlanPython::DoPlanExplainsStop(Event *event_ptr) { bool explains_stop = true; if (m_implementation_sp) { - auto explains_stop_or_error = m_interface->ExplainsStop(event_ptr); - if (!explains_stop_or_error) { - LLDB_LOG_ERROR(GetLog(LLDBLog::Thread), - explains_stop_or_error.takeError(), - "Can't call ScriptedThreadPlan::ExplainsStop."); - SetPlanComplete(false); - } else - explains_stop = *explains_stop_or_error; + ScriptInterpreter *script_interp = GetScriptInterpreter(); + if (script_interp) { + bool script_error; + explains_stop = script_interp->ScriptedThreadPlanExplainsStop( + m_implementation_sp, event_ptr, script_error); + if (script_error) + SetPlanComplete(false); + } } return explains_stop; } @@ -166,8 +150,14 @@ lldb::StateType ThreadPlanPython::GetPlanRunState() { LLDB_LOGF(log, "%s called on Python Thread Plan: %s )", LLVM_PRETTY_FUNCTION, m_class_name.c_str()); lldb::StateType run_state = eStateRunning; - if (m_implementation_sp) - run_state = m_interface->GetRunState(); + if (m_implementation_sp) { + ScriptInterpreter *script_interp = GetScriptInterpreter(); + if (script_interp) { + bool script_error; + run_state = script_interp->ScriptedThreadPlanGetRunState( + m_implementation_sp, script_error); + } + } return run_state; } @@ -178,13 +168,12 @@ void ThreadPlanPython::GetDescription(Stream *s, lldb::DescriptionLevel level) { if (m_implementation_sp) { ScriptInterpreter *script_interp = GetScriptInterpreter(); if (script_interp) { - auto desc_or_err = m_interface->GetStopDescription(s); - if (!desc_or_err || !*desc_or_err) { - LLDB_LOG_ERROR(GetLog(LLDBLog::Thread), desc_or_err.takeError(), - "Can't call ScriptedThreadPlan::GetStopDescription."); + bool script_error; + bool added_desc = script_interp->ScriptedThreadPlanGetStopDescription( + m_implementation_sp, s, script_error); + if (script_error || !added_desc) s->Printf("Python thread plan implemented by class %s.", m_class_name.c_str()); - } } return; } diff --git a/lldb/test/API/functionalities/step_scripted/Steps.py b/lldb/test/API/functionalities/step_scripted/Steps.py index 3325dba7536571..7527607be847a5 100644 --- a/lldb/test/API/functionalities/step_scripted/Steps.py +++ b/lldb/test/API/functionalities/step_scripted/Steps.py @@ -47,7 +47,7 @@ def queue_child_thread_plan(self): # This plan does a step-over until a variable changes value. class StepUntil(StepWithChild): - def __init__(self, thread_plan, args_data): + def __init__(self, thread_plan, args_data, dict): self.thread_plan = thread_plan self.frame = thread_plan.GetThread().frames[0] self.target = thread_plan.GetThread().GetProcess().GetTarget() @@ -99,7 +99,7 @@ def stop_description(self, stream): class StepReportsStopOthers: stop_mode_dict = {} - def __init__(self, thread_plan, args_data): + def __init__(self, thread_plan, args_data, dict): self.thread_plan = thread_plan self.key = str(args_data.GetValueForKey("token").GetUnsignedIntegerValue(1000)) diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp index 72dcf45a867e50..7f3359f6bf26b2 100644 --- a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp +++ b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp @@ -96,6 +96,26 @@ lldb_private::python::SWIGBridge::LLDBSwigPythonCreateCommandObject( return python::PythonObject(); } +python::PythonObject +lldb_private::python::SWIGBridge::LLDBSwigPythonCreateScriptedThreadPlan( + const char *python_class_name, const char *session_dictionary_name, + const StructuredDataImpl &args_data, std::string &error_string, + const lldb::ThreadPlanSP &thread_plan_sp) { + return python::PythonObject(); +} + +bool lldb_private::python::SWIGBridge::LLDBSWIGPythonCallThreadPlan( + void *implementor, const char *method_name, Event *event_sp, + bool &got_error) { + return false; +} + +bool lldb_private::python::SWIGBridge::LLDBSWIGPythonCallThreadPlan( + void *implementor, const char *method_name, Stream *event_sp, + bool &got_error) { + return false; +} + python::PythonObject lldb_private::python::SWIGBridge:: LLDBSwigPythonCreateScriptedBreakpointResolver( const char *python_class_name, const char *session_dictionary_name, @@ -150,16 +170,6 @@ lldb_private::python::LLDBSWIGPython_CastPyObjectToSBError(PyObject *data) { return nullptr; } -void * -lldb_private::python::LLDBSWIGPython_CastPyObjectToSBEvent(PyObject *data) { - return nullptr; -} - -void * -lldb_private::python::LLDBSWIGPython_CastPyObjectToSBStream(PyObject *data) { - return nullptr; -} - void * lldb_private::python::LLDBSWIGPython_CastPyObjectToSBValue(PyObject *data) { return nullptr; @@ -309,11 +319,6 @@ lldb_private::python::SWIGBridge::ToSWIGWrapper(lldb::ExecutionContextRefSP) { return python::PythonObject(); } -python::PythonObject -lldb_private::python::SWIGBridge::ToSWIGWrapper(lldb::ThreadPlanSP) { - return python::PythonObject(); -} - python::PythonObject lldb_private::python::SWIGBridge::ToSWIGWrapper(lldb::ProcessSP) { return python::PythonObject(); @@ -323,13 +328,3 @@ python::PythonObject lldb_private::python::SWIGBridge::ToSWIGWrapper( const lldb_private::StructuredDataImpl &) { return python::PythonObject(); } - -python::PythonObject -lldb_private::python::SWIGBridge::ToSWIGWrapper(Event *event) { - return python::PythonObject(); -} - -python::PythonObject -lldb_private::python::SWIGBridge::ToSWIGWrapper(const Stream *stream) { - return python::PythonObject(); -} From 7fbd427f5ebea4a4ebf25747758851875bb7e173 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 30 Oct 2023 17:46:18 -0700 Subject: [PATCH 105/144] =?UTF-8?q?Add=20the=20ability=20to=20get=20a=20C+?= =?UTF-8?q?+=20vtable=20ValueObject=20from=20another=20ValueObj=E2=80=A6?= =?UTF-8?q?=20(#67599)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the ability to get a C++ vtable ValueObject from another ValueObject. This patch adds the ability to ask a ValueObject for a ValueObject that represents the virtual function table for a C++ class. If the ValueObject is not a C++ class with a vtable, a valid ValueObject value will be returned that contains an appropriate error. If it is successful a valid ValueObject that represents vtable will be returned. The ValueObject that is returned will have a name that matches the demangled value for a C++ vtable mangled name like "vtable for ". It will have N children, one for each virtual function pointer. Each child's value is the function pointer itself, the summary is the symbolication of this function pointer, and the type will be a valid function pointer from the debug info if there is debug information corresponding to the virtual function pointer. The vtable SBValue will have the following: - SBValue::GetName() returns "vtable for " - SBValue::GetValue() returns a string representation of the vtable address - SBValue::GetSummary() returns NULL - SBValue::GetType() returns a type appropriate for a uintptr_t type for the current process - SBValue::GetLoadAddress() returns the address of the vtable adderess - SBValue::GetValueAsUnsigned(...) returns the vtable address - SBValue::GetNumChildren() returns the number of virtual function pointers in the vtable - SBValue::GetChildAtIndex(...) returns a SBValue that represents a virtual function pointer The child SBValue objects that represent a virtual function pointer has the following values: - SBValue::GetName() returns "[%u]" where %u is the vtable function pointer index - SBValue::GetValue() returns a string representation of the virtual function pointer - SBValue::GetSummary() returns a symbolicated respresentation of the virtual function pointer - SBValue::GetType() returns the function prototype type if there is debug info, or a generic funtion prototype if there is no debug info - SBValue::GetLoadAddress() returns the address of the virtual function pointer - SBValue::GetValueAsUnsigned(...) returns the virtual function pointer - SBValue::GetNumChildren() returns 0 - SBValue::GetChildAtIndex(...) returns invalid SBValue for any index Examples of using this API via python: ``` (lldb) script vtable = lldb.frame.FindVariable("shape_ptr").GetVTable() (lldb) script vtable vtable for Shape = 0x0000000100004088 { [0] = 0x0000000100003d20 a.out`Shape::~Shape() at main.cpp:3 [1] = 0x0000000100003e4c a.out`Shape::~Shape() at main.cpp:3 [2] = 0x0000000100003e7c a.out`Shape::area() at main.cpp:4 [3] = 0x0000000100003e3c a.out`Shape::optional() at main.cpp:7 } (lldb) script c = vtable.GetChildAtIndex(0) (lldb) script c (void ()) [0] = 0x0000000100003d20 a.out`Shape::~Shape() at main.cpp:3 ``` --- lldb/bindings/interface/SBTypeDocstrings.i | 13 - lldb/include/lldb/API/SBValue.h | 46 +++ lldb/include/lldb/Core/ValueObject.h | 4 + lldb/include/lldb/Core/ValueObjectChild.h | 1 + lldb/include/lldb/Core/ValueObjectVTable.h | 105 +++++ lldb/include/lldb/Symbol/Type.h | 2 + lldb/include/lldb/Symbol/TypeSystem.h | 4 + lldb/include/lldb/Target/LanguageRuntime.h | 26 ++ lldb/include/lldb/lldb-enumerations.h | 4 +- lldb/source/API/SBValue.cpp | 17 +- lldb/source/Commands/CommandObjectFrame.cpp | 2 + lldb/source/Core/CMakeLists.txt | 1 + lldb/source/Core/ValueObject.cpp | 5 + lldb/source/Core/ValueObjectVTable.cpp | 274 +++++++++++++ .../DataFormatters/CXXFunctionPointer.cpp | 6 +- .../Language/CPlusPlus/CPlusPlusLanguage.cpp | 4 +- .../ItaniumABI/ItaniumABILanguageRuntime.cpp | 387 +++++++++++------- .../ItaniumABI/ItaniumABILanguageRuntime.h | 27 +- .../TypeSystem/Clang/TypeSystemClang.cpp | 28 +- .../TypeSystem/Clang/TypeSystemClang.h | 4 + lldb/source/Symbol/Type.cpp | 4 + lldb/test/API/functionalities/vtable/Makefile | 3 + .../functionalities/vtable/TestVTableValue.py | 186 +++++++++ lldb/test/API/functionalities/vtable/main.cpp | 38 ++ 24 files changed, 1013 insertions(+), 178 deletions(-) create mode 100644 lldb/include/lldb/Core/ValueObjectVTable.h create mode 100644 lldb/source/Core/ValueObjectVTable.cpp create mode 100644 lldb/test/API/functionalities/vtable/Makefile create mode 100644 lldb/test/API/functionalities/vtable/TestVTableValue.py create mode 100644 lldb/test/API/functionalities/vtable/main.cpp diff --git a/lldb/bindings/interface/SBTypeDocstrings.i b/lldb/bindings/interface/SBTypeDocstrings.i index c49e9647ba0463..b056354922094f 100644 --- a/lldb/bindings/interface/SBTypeDocstrings.i +++ b/lldb/bindings/interface/SBTypeDocstrings.i @@ -139,19 +139,6 @@ SBType supports the eq/ne operator. For example,:: " ) lldb::SBType::IsReferenceType; -%feature("docstring", - "Returns true if this type is a function type. - - Language-specific behaviour: - - * C: Returns true for types that represent functions. Note that function - pointers are not function types (but their `GetPointeeType()` are function - types). - * C++: Same as in C. - * Objective-C: Returns false for all types. - " -) lldb::SBType::IsPolymorphicClass; - %feature("docstring", "Returns true if this type is a polymorphic type. diff --git a/lldb/include/lldb/API/SBValue.h b/lldb/include/lldb/API/SBValue.h index b66c2d5642b6f9..bbcccaab51aaee 100644 --- a/lldb/include/lldb/API/SBValue.h +++ b/lldb/include/lldb/API/SBValue.h @@ -374,6 +374,52 @@ class LLDB_API SBValue { lldb::SBWatchpoint WatchPointee(bool resolve_location, bool read, bool write, SBError &error); + /// If this value represents a C++ class that has a vtable, return an value + /// that represents the virtual function table. + /// + /// SBValue::GetError() will be in the success state if this value represents + /// a C++ class with a vtable, or an appropriate error describing that the + /// object isn't a C++ class with a vtable or not a C++ class. + /// + /// SBValue::GetName() will be the demangled symbol name for the virtual + /// function table like "vtable for ". + /// + /// SBValue::GetValue() will be the address of the first vtable entry if the + /// current SBValue is a class with a vtable, or nothing the current SBValue + /// is not a C++ class or not a C++ class that has a vtable. + /// + /// SBValue::GetValueAtUnsigned(...) will return the address of the first + /// vtable entry. + /// + /// SBValue::GetLoadAddress() will return the address of the vtable pointer + /// found in the parent SBValue. + /// + /// SBValue::GetNumChildren() will return the number of virtual function + /// pointers in the vtable, or zero on error. + /// + /// SBValue::GetChildAtIndex(...) will return each virtual function pointer + /// as a SBValue object. + /// + /// The child SBValue objects will have the following values: + /// + /// SBValue::GetError() will indicate success if the vtable entry was + /// successfully read from memory, or an error if not. + /// + /// SBValue::GetName() will be the vtable function index in the form "[%u]" + /// where %u is the index. + /// + /// SBValue::GetValue() will be the virtual function pointer value as a + /// string. + /// + /// SBValue::GetValueAtUnsigned(...) will return the virtual function + /// pointer value. + /// + /// SBValue::GetLoadAddress() will return the address of the virtual function + /// pointer. + /// + /// SBValue::GetNumChildren() returns 0 + lldb::SBValue GetVTable(); + protected: friend class SBBlock; friend class SBFrame; diff --git a/lldb/include/lldb/Core/ValueObject.h b/lldb/include/lldb/Core/ValueObject.h index 3af94f0a86e2fc..20b3086138457f 100644 --- a/lldb/include/lldb/Core/ValueObject.h +++ b/lldb/include/lldb/Core/ValueObject.h @@ -620,6 +620,10 @@ class ValueObject { virtual lldb::ValueObjectSP CastPointerType(const char *name, lldb::TypeSP &type_sp); + /// If this object represents a C++ class with a vtable, return an object + /// that represents the virtual function table. If the object isn't a class + /// with a vtable, return a valid ValueObject with the error set correctly. + lldb::ValueObjectSP GetVTable(); // The backing bits of this value object were updated, clear any descriptive // string, so we know we have to refetch them. void ValueUpdated() { diff --git a/lldb/include/lldb/Core/ValueObjectChild.h b/lldb/include/lldb/Core/ValueObjectChild.h index 07b37aa8a405f7..46b14e6840f0dc 100644 --- a/lldb/include/lldb/Core/ValueObjectChild.h +++ b/lldb/include/lldb/Core/ValueObjectChild.h @@ -73,6 +73,7 @@ class ValueObjectChild : public ValueObject { friend class ValueObject; friend class ValueObjectConstResult; friend class ValueObjectConstResultImpl; + friend class ValueObjectVTable; ValueObjectChild(ValueObject &parent, const CompilerType &compiler_type, ConstString name, uint64_t byte_size, diff --git a/lldb/include/lldb/Core/ValueObjectVTable.h b/lldb/include/lldb/Core/ValueObjectVTable.h new file mode 100644 index 00000000000000..217ff8d0d334ce --- /dev/null +++ b/lldb/include/lldb/Core/ValueObjectVTable.h @@ -0,0 +1,105 @@ +//===-- ValueObjectVTable.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_CORE_VALUEOBJECTVTABLE_H +#define LLDB_CORE_VALUEOBJECTVTABLE_H + +#include "lldb/Core/ValueObject.h" + +namespace lldb_private { + +/// A class that represents a virtual function table for a C++ class. +/// +/// ValueObject::GetError() will be in the success state if this value +/// represents a C++ class with a vtable, or an appropriate error describing +/// that the object isn't a C++ class with a vtable or not a C++ class. +/// +/// ValueObject::GetName() will be the demangled symbol name for the virtual +/// function table like "vtable for ". +/// +/// ValueObject::GetValueAsCString() will be the address of the first vtable +/// entry if the current ValueObject is a class with a vtable, or nothing the +/// current ValueObject is not a C++ class or not a C++ class that has a +/// vtable. +/// +/// ValueObject::GetValueAtUnsigned(...) will return the address of the first +/// vtable entry. +/// +/// ValueObject::GetAddressOf() will return the address of the vtable pointer +/// found in the parent ValueObject. +/// +/// ValueObject::GetNumChildren() will return the number of virtual function +/// pointers in the vtable, or zero on error. +/// +/// ValueObject::GetChildAtIndex(...) will return each virtual function pointer +/// as a ValueObject object. +/// +/// The child ValueObjects will have the following values: +/// +/// ValueObject::GetError() will indicate success if the vtable entry was +/// successfully read from memory, or an error if not. +/// +/// ValueObject::GetName() will be the vtable function index in the form "[%u]" +/// where %u is the index. +/// +/// ValueObject::GetValueAsCString() will be the virtual function pointer value +/// +/// ValueObject::GetValueAtUnsigned(...) will return the virtual function +/// pointer value. +/// +/// ValueObject::GetAddressOf() will return the address of the virtual function +/// pointer. +/// +/// ValueObject::GetNumChildren() returns 0 +class ValueObjectVTable : public ValueObject { +public: + ~ValueObjectVTable() override; + + static lldb::ValueObjectSP Create(ValueObject &parent); + + std::optional GetByteSize() override; + + size_t CalculateNumChildren(uint32_t max) override; + + ValueObject *CreateChildAtIndex(size_t idx, bool synthetic_array_member, + int32_t synthetic_index) override; + + lldb::ValueType GetValueType() const override; + + ConstString GetTypeName() override; + + ConstString GetQualifiedTypeName() override; + + ConstString GetDisplayTypeName() override; + + bool IsInScope() override; + +protected: + bool UpdateValue() override; + + CompilerType GetCompilerTypeImpl() override; + + /// The symbol for the C++ virtual function table. + const Symbol *m_vtable_symbol = nullptr; + /// Cache the number of vtable children when we update the value. + uint32_t m_num_vtable_entries = 0; + /// Cache the address size in bytes to avoid checking with the process to + /// many times. + uint32_t m_addr_size = 0; + +private: + ValueObjectVTable(ValueObject &parent); + + // For ValueObject only + ValueObjectVTable(const ValueObjectVTable &) = delete; + const ValueObjectVTable &operator=(const ValueObjectVTable &) = delete; +}; + +} // namespace lldb_private + +#endif // LLDB_CORE_VALUEOBJECTVTABLE_H diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h index c5ef1f53021207..15edbea3cc7ae7 100644 --- a/lldb/include/lldb/Symbol/Type.h +++ b/lldb/include/lldb/Symbol/Type.h @@ -420,6 +420,8 @@ class TypeAndOrName { void SetName(const char *type_name_cstr); + void SetName(llvm::StringRef name); + void SetTypeSP(lldb::TypeSP type_sp); void SetCompilerType(CompilerType compiler_type); diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index 5ac16be3347ff8..cd5004a3f34db4 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -437,6 +437,10 @@ class TypeSystem : public PluginInterface, virtual CompilerType GetBasicTypeFromAST(lldb::BasicType basic_type) = 0; + virtual CompilerType CreateGenericFunctionPrototype() { + return CompilerType(); + } + virtual CompilerType GetBuiltinTypeForEncodingAndBitSize(lldb::Encoding encoding, size_t bit_size) = 0; diff --git a/lldb/include/lldb/Target/LanguageRuntime.h b/lldb/include/lldb/Target/LanguageRuntime.h index eff79a0bf0d062..a2a9c0163f082f 100644 --- a/lldb/include/lldb/Target/LanguageRuntime.h +++ b/lldb/include/lldb/Target/LanguageRuntime.h @@ -78,6 +78,32 @@ class LanguageRuntime : public Runtime, public PluginInterface { virtual bool GetObjectDescription(Stream &str, Value &value, ExecutionContextScope *exe_scope) = 0; + + struct VTableInfo { + Address addr; /// Address of the vtable's virtual function table + Symbol *symbol; /// The vtable symbol from the symbol table + }; + /// Get the vtable information for a given value. + /// + /// \param[in] in_value + /// The value object to try and extract the VTableInfo from. + /// + /// \param[in] check_type + /// If true, the compiler type of \a in_value will be checked to see if + /// it is an instance to, or pointer or reference to a class or struct + /// that has a vtable. If the type doesn't meet the requirements, an + /// error will be returned explaining why the type isn't suitable. + /// + /// \return + /// An error if anything goes wrong while trying to extract the vtable + /// or if \a check_type is true and the type doesn't have a vtable. + virtual llvm::Expected GetVTableInfo(ValueObject &in_value, + bool check_type) { + return llvm::createStringError( + std::errc::invalid_argument, + "language doesn't support getting vtable information"); + } + // this call should return true if it could set the name and/or the type virtual bool GetDynamicTypeAndAddress(ValueObject &in_value, lldb::DynamicValueType use_dynamic, diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 206ff4ed7e6ad0..633a3ee696c208 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -322,7 +322,9 @@ enum ValueType { eValueTypeRegister = 5, ///< stack frame register value eValueTypeRegisterSet = 6, ///< A collection of stack frame register values eValueTypeConstResult = 7, ///< constant result variables - eValueTypeVariableThreadLocal = 8 ///< thread local storage variable + eValueTypeVariableThreadLocal = 8, ///< thread local storage variable + eValueTypeVTable = 9, ///< virtual function table + eValueTypeVTableEntry = 10, ///< function pointer in virtual function table }; /// Token size/granularities for Input Readers. diff --git a/lldb/source/API/SBValue.cpp b/lldb/source/API/SBValue.cpp index e14f1196c6316b..34d01d759ba55a 100644 --- a/lldb/source/API/SBValue.cpp +++ b/lldb/source/API/SBValue.cpp @@ -114,7 +114,7 @@ class ValueImpl { Target *target = value_sp->GetTargetSP().get(); // If this ValueObject holds an error, then it is valuable for that. - if (value_sp->GetError().Fail()) + if (value_sp->GetError().Fail()) return value_sp; if (!target) @@ -1038,8 +1038,8 @@ lldb::ValueObjectSP SBValue::GetSP(ValueLocker &locker) const { // IsValid means that the SBValue has a value in it. But that's not the // only time that ValueObjects are useful. We also want to return the value // if there's an error state in it. - if (!m_opaque_sp || (!m_opaque_sp->IsValid() - && (m_opaque_sp->GetRootSP() + if (!m_opaque_sp || (!m_opaque_sp->IsValid() + && (m_opaque_sp->GetRootSP() && !m_opaque_sp->GetRootSP()->GetError().Fail()))) { locker.GetError().SetErrorString("No value"); return ValueObjectSP(); @@ -1505,3 +1505,14 @@ lldb::SBValue SBValue::Persist() { } return persisted_sb; } + +lldb::SBValue SBValue::GetVTable() { + SBValue vtable_sb; + ValueLocker locker; + lldb::ValueObjectSP value_sp(GetSP(locker)); + if (!value_sp) + return vtable_sb; + + vtable_sb.SetSP(value_sp->GetVTable()); + return vtable_sb; +} diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp index 1fad638f214536..e7cb861c2b01c6 100644 --- a/lldb/source/Commands/CommandObjectFrame.cpp +++ b/lldb/source/Commands/CommandObjectFrame.cpp @@ -490,6 +490,8 @@ may even involve JITing and running code in the target program.)"); case eValueTypeRegisterSet: case eValueTypeConstResult: case eValueTypeVariableThreadLocal: + case eValueTypeVTable: + case eValueTypeVTableEntry: return false; } } diff --git a/lldb/source/Core/CMakeLists.txt b/lldb/source/Core/CMakeLists.txt index 9073e3e9b2ee33..10525ac39e6ef5 100644 --- a/lldb/source/Core/CMakeLists.txt +++ b/lldb/source/Core/CMakeLists.txt @@ -71,6 +71,7 @@ add_lldb_library(lldbCore ValueObjectSyntheticFilter.cpp ValueObjectUpdater.cpp ValueObjectVariable.cpp + ValueObjectVTable.cpp DEPENDS clang-tablegen-targets diff --git a/lldb/source/Core/ValueObject.cpp b/lldb/source/Core/ValueObject.cpp index ebfc1cf4d6fe9e..bdb1bef633d8fb 100644 --- a/lldb/source/Core/ValueObject.cpp +++ b/lldb/source/Core/ValueObject.cpp @@ -17,6 +17,7 @@ #include "lldb/Core/ValueObjectDynamicValue.h" #include "lldb/Core/ValueObjectMemory.h" #include "lldb/Core/ValueObjectSyntheticFilter.h" +#include "lldb/Core/ValueObjectVTable.h" #include "lldb/DataFormatters/DataVisualization.h" #include "lldb/DataFormatters/DumpValueObjectOptions.h" #include "lldb/DataFormatters/FormatManager.h" @@ -3155,3 +3156,7 @@ ValueObjectSP ValueObject::Persist() { return persistent_var_sp->GetValueObject(); } + +lldb::ValueObjectSP ValueObject::GetVTable() { + return ValueObjectVTable::Create(*this); +} diff --git a/lldb/source/Core/ValueObjectVTable.cpp b/lldb/source/Core/ValueObjectVTable.cpp new file mode 100644 index 00000000000000..177ae4167a1d45 --- /dev/null +++ b/lldb/source/Core/ValueObjectVTable.cpp @@ -0,0 +1,274 @@ +//===-- ValueObjectVTable.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Core/ValueObjectVTable.h" +#include "lldb/Core/Module.h" +#include "lldb/Core/ValueObjectChild.h" +#include "lldb/Symbol/Function.h" +#include "lldb/Target/Language.h" +#include "lldb/Target/LanguageRuntime.h" +#include "lldb/lldb-defines.h" +#include "lldb/lldb-enumerations.h" +#include "lldb/lldb-forward.h" +#include "lldb/lldb-private-enumerations.h" + +using namespace lldb; +using namespace lldb_private; + +class ValueObjectVTableChild : public ValueObject { +public: + ValueObjectVTableChild(ValueObject &parent, uint32_t func_idx, + uint64_t addr_size) + : ValueObject(parent), m_func_idx(func_idx), m_addr_size(addr_size) { + SetFormat(eFormatPointer); + SetName(ConstString(llvm::formatv("[{0}]", func_idx).str())); + } + + ~ValueObjectVTableChild() override = default; + + std::optional GetByteSize() override { return m_addr_size; }; + + size_t CalculateNumChildren(uint32_t max) override { return 0; }; + + ValueType GetValueType() const override { return eValueTypeVTableEntry; }; + + bool IsInScope() override { + if (ValueObject *parent = GetParent()) + return parent->IsInScope(); + return false; + }; + +protected: + bool UpdateValue() override { + SetValueIsValid(false); + m_value.Clear(); + ValueObject *parent = GetParent(); + if (!parent) { + m_error.SetErrorString("owning vtable object not valid"); + return false; + } + + addr_t parent_addr = parent->GetValueAsUnsigned(LLDB_INVALID_ADDRESS); + if (parent_addr == LLDB_INVALID_ADDRESS) { + m_error.SetErrorString("invalid vtable address"); + return false; + } + + ProcessSP process_sp = GetProcessSP(); + if (!process_sp) { + m_error.SetErrorString("no process"); + return false; + } + + TargetSP target_sp = GetTargetSP(); + if (!target_sp) { + m_error.SetErrorString("no target"); + return false; + } + + // Each `vtable_entry_addr` points to the function pointer. + addr_t vtable_entry_addr = parent_addr + m_func_idx * m_addr_size; + addr_t vfunc_ptr = + process_sp->ReadPointerFromMemory(vtable_entry_addr, m_error); + if (m_error.Fail()) { + m_error.SetErrorStringWithFormat( + "failed to read virtual function entry 0x%16.16" PRIx64, + vtable_entry_addr); + return false; + } + + + // Set our value to be the load address of the function pointer in memory + // and our type to be the function pointer type. + m_value.SetValueType(Value::ValueType::LoadAddress); + m_value.GetScalar() = vtable_entry_addr; + + // See if our resolved address points to a function in the debug info. If + // it does, then we can report the type as a function prototype for this + // function. + Function *function = nullptr; + Address resolved_vfunc_ptr_address; + target_sp->ResolveLoadAddress(vfunc_ptr, resolved_vfunc_ptr_address); + if (resolved_vfunc_ptr_address.IsValid()) + function = resolved_vfunc_ptr_address.CalculateSymbolContextFunction(); + if (function) { + m_value.SetCompilerType(function->GetCompilerType().GetPointerType()); + } else { + // Set our value's compiler type to a generic function protoype so that + // it displays as a hex function pointer for the value and the summary + // will display the address description. + + // Get the original type that this vtable is based off of so we can get + // the language from it correctly. + ValueObject *val = parent->GetParent(); + auto type_system = target_sp->GetScratchTypeSystemForLanguage( + val ? val->GetObjectRuntimeLanguage() : eLanguageTypeC_plus_plus); + if (type_system) { + m_value.SetCompilerType( + (*type_system)->CreateGenericFunctionPrototype().GetPointerType()); + } else { + consumeError(type_system.takeError()); + } + } + + // Now read our value into m_data so that our we can use the default + // summary provider for C++ for function pointers which will get the + // address description for our function pointer. + if (m_error.Success()) { + const bool thread_and_frame_only_if_stopped = true; + ExecutionContext exe_ctx( + GetExecutionContextRef().Lock(thread_and_frame_only_if_stopped)); + m_error = m_value.GetValueAsData(&exe_ctx, m_data, GetModule().get()); + } + SetValueDidChange(true); + SetValueIsValid(true); + return true; + }; + + CompilerType GetCompilerTypeImpl() override { + return m_value.GetCompilerType(); + }; + + const uint32_t m_func_idx; + const uint64_t m_addr_size; + +private: + // For ValueObject only + ValueObjectVTableChild(const ValueObjectVTableChild &) = delete; + const ValueObjectVTableChild & + operator=(const ValueObjectVTableChild &) = delete; +}; + +ValueObjectSP ValueObjectVTable::Create(ValueObject &parent) { + return (new ValueObjectVTable(parent))->GetSP(); +} + +ValueObjectVTable::ValueObjectVTable(ValueObject &parent) + : ValueObject(parent) { + SetFormat(eFormatPointer); +} + +std::optional ValueObjectVTable::GetByteSize() { + if (m_vtable_symbol) + return m_vtable_symbol->GetByteSize(); + return std::nullopt; +} + +size_t ValueObjectVTable::CalculateNumChildren(uint32_t max) { + if (UpdateValueIfNeeded(false)) + return m_num_vtable_entries <= max ? m_num_vtable_entries : max; + return 0; +} + +ValueType ValueObjectVTable::GetValueType() const { return eValueTypeVTable; } + +ConstString ValueObjectVTable::GetTypeName() { + if (m_vtable_symbol) + return m_vtable_symbol->GetName(); + return ConstString(); +} + +ConstString ValueObjectVTable::GetQualifiedTypeName() { return GetTypeName(); } + +ConstString ValueObjectVTable::GetDisplayTypeName() { + if (m_vtable_symbol) + return m_vtable_symbol->GetDisplayName(); + return ConstString(); +} + +bool ValueObjectVTable::IsInScope() { return GetParent()->IsInScope(); } + +ValueObject *ValueObjectVTable::CreateChildAtIndex(size_t idx, + bool synthetic_array_member, + int32_t synthetic_index) { + if (synthetic_array_member) + return nullptr; + return new ValueObjectVTableChild(*this, idx, m_addr_size); +} + +bool ValueObjectVTable::UpdateValue() { + m_error.Clear(); + m_flags.m_children_count_valid = false; + SetValueIsValid(false); + m_num_vtable_entries = 0; + ValueObject *parent = GetParent(); + if (!parent) { + m_error.SetErrorString("no parent object"); + return false; + } + + ProcessSP process_sp = GetProcessSP(); + if (!process_sp) { + m_error.SetErrorString("no process"); + return false; + } + + const LanguageType language = parent->GetObjectRuntimeLanguage(); + LanguageRuntime *language_runtime = process_sp->GetLanguageRuntime(language); + + if (language_runtime == nullptr) { + m_error.SetErrorStringWithFormat( + "no language runtime support for the language \"%s\"", + Language::GetNameForLanguageType(language)); + return false; + } + + // Get the vtable information from the language runtime. + llvm::Expected vtable_info_or_err = + language_runtime->GetVTableInfo(*parent, /*check_type=*/true); + if (!vtable_info_or_err) { + m_error = vtable_info_or_err.takeError(); + return false; + } + + TargetSP target_sp = GetTargetSP(); + const addr_t vtable_start_addr = + vtable_info_or_err->addr.GetLoadAddress(target_sp.get()); + + m_vtable_symbol = vtable_info_or_err->symbol; + if (!m_vtable_symbol) { + m_error.SetErrorStringWithFormat( + "no vtable symbol found containing 0x%" PRIx64, vtable_start_addr); + return false; + } + + // Now that we know it's a vtable, we update the object's state. + SetName(GetTypeName()); + + // Calculate the number of entries + if (!m_vtable_symbol->GetByteSizeIsValid()) { + m_error.SetErrorStringWithFormat( + "vtable symbol \"%s\" doesn't have a valid size", + m_vtable_symbol->GetMangled().GetDemangledName().GetCString()); + return false; + } + + m_addr_size = process_sp->GetAddressByteSize(); + const addr_t vtable_end_addr = + m_vtable_symbol->GetLoadAddress(target_sp.get()) + + m_vtable_symbol->GetByteSize(); + m_num_vtable_entries = (vtable_end_addr - vtable_start_addr) / m_addr_size; + + m_value.SetValueType(Value::ValueType::LoadAddress); + m_value.GetScalar() = parent->GetAddressOf(); + auto type_system_or_err = + target_sp->GetScratchTypeSystemForLanguage(eLanguageTypeC_plus_plus); + if (type_system_or_err) { + m_value.SetCompilerType( + (*type_system_or_err)->GetBasicTypeFromAST(eBasicTypeUnsignedLong)); + } else { + consumeError(type_system_or_err.takeError()); + } + SetValueDidChange(true); + SetValueIsValid(true); + return true; +} + +CompilerType ValueObjectVTable::GetCompilerTypeImpl() { return CompilerType(); } + +ValueObjectVTable::~ValueObjectVTable() = default; diff --git a/lldb/source/DataFormatters/CXXFunctionPointer.cpp b/lldb/source/DataFormatters/CXXFunctionPointer.cpp index d7df280e56efb0..6543433d17ff45 100644 --- a/lldb/source/DataFormatters/CXXFunctionPointer.cpp +++ b/lldb/source/DataFormatters/CXXFunctionPointer.cpp @@ -13,6 +13,7 @@ #include "lldb/Target/SectionLoadList.h" #include "lldb/Target/Target.h" #include "lldb/Utility/Stream.h" +#include "lldb/lldb-enumerations.h" #include @@ -76,7 +77,10 @@ bool lldb_private::formatters::CXXFunctionPointerSummaryProvider( } } if (sstr.GetSize() > 0) { - stream.Printf("(%s)", sstr.GetData()); + if (valobj.GetValueType() == lldb::eValueTypeVTableEntry) + stream.PutCString(sstr.GetData()); + else + stream.Printf("(%s)", sstr.GetData()); return true; } else return false; diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index 8b8d330799cb6c..f4537b4133b93e 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -45,6 +45,7 @@ #include "LibCxxVariant.h" #include "LibStdcpp.h" #include "MSVCUndecoratedNameParser.h" +#include "lldb/lldb-enumerations.h" using namespace lldb; using namespace lldb_private; @@ -1415,7 +1416,8 @@ CPlusPlusLanguage::GetHardcodedSummaries() { lldb_private::formatters::CXXFunctionPointerSummaryProvider, "Function pointer summary provider")); if (CompilerType CT = valobj.GetCompilerType(); - CT.IsFunctionPointerType() || CT.IsMemberFunctionPointerType()) { + CT.IsFunctionPointerType() || CT.IsMemberFunctionPointerType() || + valobj.GetValueType() == lldb::eValueTypeVTableEntry) { return formatter_sp; } return nullptr; diff --git a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp index 53e856bf3514e0..17c8b43578691c 100644 --- a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp +++ b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp @@ -54,134 +54,228 @@ bool ItaniumABILanguageRuntime::CouldHaveDynamicValue(ValueObject &in_value) { check_objc); } -TypeAndOrName ItaniumABILanguageRuntime::GetTypeInfoFromVTableAddress( - ValueObject &in_value, lldb::addr_t original_ptr, - lldb::addr_t vtable_load_addr) { - if (m_process && vtable_load_addr != LLDB_INVALID_ADDRESS) { - // Find the symbol that contains the "vtable_load_addr" address - Address vtable_addr; - Target &target = m_process->GetTarget(); - if (!target.GetSectionLoadList().IsEmpty()) { - if (target.GetSectionLoadList().ResolveLoadAddress(vtable_load_addr, - vtable_addr)) { - // See if we have cached info for this type already - TypeAndOrName type_info = GetDynamicTypeInfo(vtable_addr); - if (type_info) - return type_info; - - SymbolContext sc; - target.GetImages().ResolveSymbolContextForAddress( - vtable_addr, eSymbolContextSymbol, sc); - Symbol *symbol = sc.symbol; - if (symbol != nullptr) { - const char *name = - symbol->GetMangled().GetDemangledName().AsCString(); - if (name && strstr(name, vtable_demangled_prefix) == name) { - Log *log = GetLog(LLDBLog::Object); - LLDB_LOGF(log, - "0x%16.16" PRIx64 - ": static-type = '%s' has vtable symbol '%s'\n", - original_ptr, in_value.GetTypeName().GetCString(), name); - // We are a C++ class, that's good. Get the class name and look it - // up: - const char *class_name = name + strlen(vtable_demangled_prefix); - // We know the class name is absolute, so tell FindTypes that by - // prefixing it with the root namespace: - std::string lookup_name("::"); - lookup_name.append(class_name); - - type_info.SetName(class_name); - const bool exact_match = true; - TypeList class_types; - - // First look in the module that the vtable symbol came from and - // look for a single exact match. - llvm::DenseSet searched_symbol_files; - if (sc.module_sp) - sc.module_sp->FindTypes(ConstString(lookup_name), exact_match, 1, +TypeAndOrName ItaniumABILanguageRuntime::GetTypeInfo( + ValueObject &in_value, const VTableInfo &vtable_info) { + if (vtable_info.addr.IsSectionOffset()) { + // See if we have cached info for this type already + TypeAndOrName type_info = GetDynamicTypeInfo(vtable_info.addr); + if (type_info) + return type_info; + + if (vtable_info.symbol) { + Log *log = GetLog(LLDBLog::Object); + llvm::StringRef symbol_name = + vtable_info.symbol->GetMangled().GetDemangledName().GetStringRef(); + LLDB_LOGF(log, + "0x%16.16" PRIx64 + ": static-type = '%s' has vtable symbol '%s'\n", + in_value.GetPointerValue(), + in_value.GetTypeName().GetCString(), + symbol_name.str().c_str()); + // We are a C++ class, that's good. Get the class name and look it + // up: + llvm::StringRef class_name = symbol_name; + class_name.consume_front(vtable_demangled_prefix); + // We know the class name is absolute, so tell FindTypes that by + // prefixing it with the root namespace: + std::string lookup_name("::"); + lookup_name.append(class_name.data(), class_name.size()); + + type_info.SetName(class_name); + const bool exact_match = true; + TypeList class_types; + + // First look in the module that the vtable symbol came from and + // look for a single exact match. + llvm::DenseSet searched_symbol_files; + ModuleSP module_sp = vtable_info.symbol->CalculateSymbolContextModule(); + if (module_sp) + module_sp->FindTypes(ConstString(lookup_name), exact_match, 1, + searched_symbol_files, class_types); + + // If we didn't find a symbol, then move on to the entire module + // list in the target and get as many unique matches as possible + Target &target = m_process->GetTarget(); + if (class_types.Empty()) + target.GetImages().FindTypes(nullptr, ConstString(lookup_name), + exact_match, UINT32_MAX, searched_symbol_files, class_types); - // If we didn't find a symbol, then move on to the entire module - // list in the target and get as many unique matches as possible - if (class_types.Empty()) - target.GetImages().FindTypes(nullptr, ConstString(lookup_name), - exact_match, UINT32_MAX, - searched_symbol_files, class_types); - - lldb::TypeSP type_sp; - if (class_types.Empty()) { - LLDB_LOGF(log, "0x%16.16" PRIx64 ": is not dynamic\n", - original_ptr); - return TypeAndOrName(); + lldb::TypeSP type_sp; + if (class_types.Empty()) { + LLDB_LOGF(log, "0x%16.16" PRIx64 ": is not dynamic\n", + in_value.GetPointerValue()); + return TypeAndOrName(); + } + if (class_types.GetSize() == 1) { + type_sp = class_types.GetTypeAtIndex(0); + if (type_sp) { + if (TypeSystemClang::IsCXXClassType( + type_sp->GetForwardCompilerType())) { + LLDB_LOGF( + log, + "0x%16.16" PRIx64 + ": static-type = '%s' has dynamic type: uid={0x%" PRIx64 + "}, type-name='%s'\n", + in_value.GetPointerValue(), in_value.GetTypeName().AsCString(), + type_sp->GetID(), type_sp->GetName().GetCString()); + type_info.SetTypeSP(type_sp); + } + } + } else { + size_t i; + if (log) { + for (i = 0; i < class_types.GetSize(); i++) { + type_sp = class_types.GetTypeAtIndex(i); + if (type_sp) { + LLDB_LOGF( + log, + "0x%16.16" PRIx64 + ": static-type = '%s' has multiple matching dynamic " + "types: uid={0x%" PRIx64 "}, type-name='%s'\n", + in_value.GetPointerValue(), + in_value.GetTypeName().AsCString(), + type_sp->GetID(), type_sp->GetName().GetCString()); } - if (class_types.GetSize() == 1) { - type_sp = class_types.GetTypeAtIndex(0); - if (type_sp) { - if (TypeSystemClang::IsCXXClassType( - type_sp->GetForwardCompilerType())) { - LLDB_LOGF( - log, - "0x%16.16" PRIx64 - ": static-type = '%s' has dynamic type: uid={0x%" PRIx64 - "}, type-name='%s'\n", - original_ptr, in_value.GetTypeName().AsCString(), - type_sp->GetID(), type_sp->GetName().GetCString()); - type_info.SetTypeSP(type_sp); - } - } - } else { - size_t i; - if (log) { - for (i = 0; i < class_types.GetSize(); i++) { - type_sp = class_types.GetTypeAtIndex(i); - if (type_sp) { - LLDB_LOGF( - log, - "0x%16.16" PRIx64 - ": static-type = '%s' has multiple matching dynamic " - "types: uid={0x%" PRIx64 "}, type-name='%s'\n", - original_ptr, in_value.GetTypeName().AsCString(), - type_sp->GetID(), type_sp->GetName().GetCString()); - } - } - } - - for (i = 0; i < class_types.GetSize(); i++) { - type_sp = class_types.GetTypeAtIndex(i); - if (type_sp) { - if (TypeSystemClang::IsCXXClassType( - type_sp->GetForwardCompilerType())) { - LLDB_LOGF( - log, - "0x%16.16" PRIx64 ": static-type = '%s' has multiple " - "matching dynamic types, picking " - "this one: uid={0x%" PRIx64 "}, type-name='%s'\n", - original_ptr, in_value.GetTypeName().AsCString(), - type_sp->GetID(), type_sp->GetName().GetCString()); - type_info.SetTypeSP(type_sp); - } - } - } - - if (log) { - LLDB_LOGF(log, - "0x%16.16" PRIx64 - ": static-type = '%s' has multiple matching dynamic " - "types, didn't find a C++ match\n", - original_ptr, in_value.GetTypeName().AsCString()); - } + } + } + + for (i = 0; i < class_types.GetSize(); i++) { + type_sp = class_types.GetTypeAtIndex(i); + if (type_sp) { + if (TypeSystemClang::IsCXXClassType( + type_sp->GetForwardCompilerType())) { + LLDB_LOGF( + log, + "0x%16.16" PRIx64 ": static-type = '%s' has multiple " + "matching dynamic types, picking " + "this one: uid={0x%" PRIx64 "}, type-name='%s'\n", + in_value.GetPointerValue(), + in_value.GetTypeName().AsCString(), + type_sp->GetID(), type_sp->GetName().GetCString()); + type_info.SetTypeSP(type_sp); } - if (type_info) - SetDynamicTypeInfo(vtable_addr, type_info); - return type_info; } } + + if (log) { + LLDB_LOGF(log, + "0x%16.16" PRIx64 + ": static-type = '%s' has multiple matching dynamic " + "types, didn't find a C++ match\n", + in_value.GetPointerValue(), + in_value.GetTypeName().AsCString()); + } } + if (type_info) + SetDynamicTypeInfo(vtable_info.addr, type_info); + return type_info; } } return TypeAndOrName(); } +llvm::Error ItaniumABILanguageRuntime::TypeHasVTable(CompilerType type) { + // Check to make sure the class has a vtable. + CompilerType original_type = type; + if (type.IsPointerOrReferenceType()) { + CompilerType pointee_type = type.GetPointeeType(); + if (pointee_type) + type = pointee_type; + } + + // Make sure this is a class or a struct first by checking the type class + // bitfield that gets returned. + if ((type.GetTypeClass() & (eTypeClassStruct | eTypeClassClass)) == 0) { + return llvm::createStringError(std::errc::invalid_argument, + "type \"%s\" is not a class or struct or a pointer to one", + original_type.GetTypeName().AsCString("")); + } + + // Check if the type has virtual functions by asking it if it is polymorphic. + if (!type.IsPolymorphicClass()) { + return llvm::createStringError(std::errc::invalid_argument, + "type \"%s\" doesn't have a vtable", + type.GetTypeName().AsCString("")); + } + return llvm::Error::success(); +} + +// This function can accept both pointers or references to classes as well as +// instances of classes. If you are using this function during dynamic type +// detection, only valid ValueObjects that return true to +// CouldHaveDynamicValue(...) should call this function and \a check_type +// should be set to false. This function is also used by ValueObjectVTable +// and is can pass in instances of classes which is not suitable for dynamic +// type detection, these cases should pass true for \a check_type. +llvm::Expected + ItaniumABILanguageRuntime::GetVTableInfo(ValueObject &in_value, + bool check_type) { + + CompilerType type = in_value.GetCompilerType(); + if (check_type) { + if (llvm::Error err = TypeHasVTable(type)) + return std::move(err); + } + ExecutionContext exe_ctx(in_value.GetExecutionContextRef()); + Process *process = exe_ctx.GetProcessPtr(); + if (process == nullptr) + return llvm::createStringError(std::errc::invalid_argument, + "invalid process"); + + AddressType address_type; + lldb::addr_t original_ptr = LLDB_INVALID_ADDRESS; + if (type.IsPointerOrReferenceType()) + original_ptr = in_value.GetPointerValue(&address_type); + else + original_ptr = in_value.GetAddressOf(/*scalar_is_load_address=*/true, + &address_type); + if (original_ptr == LLDB_INVALID_ADDRESS || address_type != eAddressTypeLoad) + return llvm::createStringError(std::errc::invalid_argument, + "failed to get the address of the value"); + + Status error; + const lldb::addr_t vtable_load_addr = + process->ReadPointerFromMemory(original_ptr, error); + + if (!error.Success() || vtable_load_addr == LLDB_INVALID_ADDRESS) + return llvm::createStringError(std::errc::invalid_argument, + "failed to read vtable pointer from memory at 0x%" PRIx64, + original_ptr); +; + + // Find the symbol that contains the "vtable_load_addr" address + Address vtable_addr; + if (!process->GetTarget().ResolveLoadAddress(vtable_load_addr, vtable_addr)) + return llvm::createStringError(std::errc::invalid_argument, + "failed to resolve vtable pointer 0x%" + PRIx64 "to a section", vtable_load_addr); + + // Check our cache first to see if we already have this info + { + std::lock_guard locker(m_mutex); + auto pos = m_vtable_info_map.find(vtable_addr); + if (pos != m_vtable_info_map.end()) + return pos->second; + } + + Symbol *symbol = vtable_addr.CalculateSymbolContextSymbol(); + if (symbol == nullptr) + return llvm::createStringError(std::errc::invalid_argument, + "no symbol found for 0x%" PRIx64, + vtable_load_addr); + llvm::StringRef name = symbol->GetMangled().GetDemangledName().GetStringRef(); + if (name.startswith(vtable_demangled_prefix)) { + VTableInfo info = {vtable_addr, symbol}; + std::lock_guard locker(m_mutex); + auto pos = m_vtable_info_map[vtable_addr] = info; + return info; + } + return llvm::createStringError(std::errc::invalid_argument, + "symbol found that contains 0x%" PRIx64 " is not a vtable symbol", + vtable_load_addr); +} + bool ItaniumABILanguageRuntime::GetDynamicTypeAndAddress( ValueObject &in_value, lldb::DynamicValueType use_dynamic, TypeAndOrName &class_type_or_name, Address &dynamic_address, @@ -198,33 +292,23 @@ bool ItaniumABILanguageRuntime::GetDynamicTypeAndAddress( class_type_or_name.Clear(); value_type = Value::ValueType::Scalar; - // Only a pointer or reference type can have a different dynamic and static - // type: if (!CouldHaveDynamicValue(in_value)) return false; - // First job, pull out the address at 0 offset from the object. - AddressType address_type; - lldb::addr_t original_ptr = in_value.GetPointerValue(&address_type); - if (original_ptr == LLDB_INVALID_ADDRESS) - return false; - - ExecutionContext exe_ctx(in_value.GetExecutionContextRef()); - - Process *process = exe_ctx.GetProcessPtr(); - - if (process == nullptr) - return false; - - Status error; - const lldb::addr_t vtable_address_point = - process->ReadPointerFromMemory(original_ptr, error); - - if (!error.Success() || vtable_address_point == LLDB_INVALID_ADDRESS) + // Check if we have a vtable pointer in this value. If we don't it will + // return an error, else it will return a valid resolved address. We don't + // want GetVTableInfo to check the type since we accept void * as a possible + // dynamic type and that won't pass the type check. We already checked the + // type above in CouldHaveDynamicValue(...). + llvm::Expected vtable_info_or_err = + GetVTableInfo(in_value, /*check_type=*/false); + if (!vtable_info_or_err) { + llvm::consumeError(vtable_info_or_err.takeError()); return false; + } - class_type_or_name = GetTypeInfoFromVTableAddress(in_value, original_ptr, - vtable_address_point); + const VTableInfo &vtable_info = vtable_info_or_err.get(); + class_type_or_name = GetTypeInfo(in_value, vtable_info); if (!class_type_or_name) return false; @@ -244,22 +328,27 @@ bool ItaniumABILanguageRuntime::GetDynamicTypeAndAddress( } // The offset_to_top is two pointers above the vtable pointer. - const uint32_t addr_byte_size = process->GetAddressByteSize(); + Target &target = m_process->GetTarget(); + const addr_t vtable_load_addr = vtable_info.addr.GetLoadAddress(&target); + if (vtable_load_addr == LLDB_INVALID_ADDRESS) + return false; + const uint32_t addr_byte_size = m_process->GetAddressByteSize(); const lldb::addr_t offset_to_top_location = - vtable_address_point - 2 * addr_byte_size; + vtable_load_addr - 2 * addr_byte_size; // Watch for underflow, offset_to_top_location should be less than - // vtable_address_point - if (offset_to_top_location >= vtable_address_point) + // vtable_load_addr + if (offset_to_top_location >= vtable_load_addr) return false; - const int64_t offset_to_top = process->ReadSignedIntegerFromMemory( + Status error; + const int64_t offset_to_top = m_process->ReadSignedIntegerFromMemory( offset_to_top_location, addr_byte_size, INT64_MIN, error); if (offset_to_top == INT64_MIN) return false; // So the dynamic type is a value that starts at offset_to_top above // the original address. - lldb::addr_t dynamic_addr = original_ptr + offset_to_top; - if (!process->GetTarget().GetSectionLoadList().ResolveLoadAddress( + lldb::addr_t dynamic_addr = in_value.GetPointerValue() + offset_to_top; + if (!m_process->GetTarget().ResolveLoadAddress( dynamic_addr, dynamic_address)) { dynamic_address.SetRawAddress(dynamic_addr); } @@ -582,10 +671,10 @@ ValueObjectSP ItaniumABILanguageRuntime::GetExceptionObjectForThread( ValueObjectSP exception = ValueObject::CreateValueObjectFromData( "exception", exception_isw.GetAsData(m_process->GetByteOrder()), exe_ctx, voidstar); - ValueObjectSP dyn_exception + ValueObjectSP dyn_exception = exception->GetDynamicValue(eDynamicDontRunTarget); // If we succeed in making a dynamic value, return that: - if (dyn_exception) + if (dyn_exception) return dyn_exception; return exception; @@ -593,7 +682,7 @@ ValueObjectSP ItaniumABILanguageRuntime::GetExceptionObjectForThread( TypeAndOrName ItaniumABILanguageRuntime::GetDynamicTypeInfo( const lldb_private::Address &vtable_addr) { - std::lock_guard locker(m_dynamic_type_map_mutex); + std::lock_guard locker(m_mutex); DynamicTypeCache::const_iterator pos = m_dynamic_type_map.find(vtable_addr); if (pos == m_dynamic_type_map.end()) return TypeAndOrName(); @@ -603,6 +692,6 @@ TypeAndOrName ItaniumABILanguageRuntime::GetDynamicTypeInfo( void ItaniumABILanguageRuntime::SetDynamicTypeInfo( const lldb_private::Address &vtable_addr, const TypeAndOrName &type_info) { - std::lock_guard locker(m_dynamic_type_map_mutex); + std::lock_guard locker(m_mutex); m_dynamic_type_map[vtable_addr] = type_info; } diff --git a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h index ca8d5ab1a93a1b..0f7e73cfee0754 100644 --- a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h +++ b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h @@ -47,6 +47,10 @@ class ItaniumABILanguageRuntime : public lldb_private::CPPLanguageRuntime { return runtime->isA(&ID); } + + llvm::Expected + GetVTableInfo(ValueObject &in_value, bool check_type) override; + bool GetDynamicTypeAndAddress(ValueObject &in_value, lldb::DynamicValueType use_dynamic, TypeAndOrName &class_type_or_name, @@ -71,7 +75,7 @@ class ItaniumABILanguageRuntime : public lldb_private::CPPLanguageRuntime { bool catch_bp, bool throw_bp) override; lldb::SearchFilterSP CreateExceptionSearchFilter() override; - + lldb::ValueObjectSP GetExceptionObjectForThread( lldb::ThreadSP thread_sp) override; @@ -89,24 +93,33 @@ class ItaniumABILanguageRuntime : public lldb_private::CPPLanguageRuntime { private: typedef std::map DynamicTypeCache; + typedef std::map VTableInfoCache; ItaniumABILanguageRuntime(Process *process) : // Call CreateInstance instead. - lldb_private::CPPLanguageRuntime(process), m_cxx_exception_bp_sp(), - m_dynamic_type_map(), m_dynamic_type_map_mutex() {} + lldb_private::CPPLanguageRuntime(process) {} lldb::BreakpointSP m_cxx_exception_bp_sp; DynamicTypeCache m_dynamic_type_map; - std::mutex m_dynamic_type_map_mutex; + VTableInfoCache m_vtable_info_map; + std::mutex m_mutex; - TypeAndOrName GetTypeInfoFromVTableAddress(ValueObject &in_value, - lldb::addr_t original_ptr, - lldb::addr_t vtable_addr); + TypeAndOrName GetTypeInfo(ValueObject &in_value, + const VTableInfo &vtable_info); TypeAndOrName GetDynamicTypeInfo(const lldb_private::Address &vtable_addr); void SetDynamicTypeInfo(const lldb_private::Address &vtable_addr, const TypeAndOrName &type_info); + + // Check if a compiler type has a vtable. + // + // If the compiler type is a pointer or a reference, this function will check + // if the pointee type has a vtable, else it will check the type passed in. + // + // Returns an error if the type of the value doesn't have a vtable with an + // explanation why, or returns an Error::success() if the type has a vtable. + llvm::Error TypeHasVTable(CompilerType compiler_type); }; } // namespace lldb_private diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index df06ba0ed952af..f037708efc3800 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -3557,8 +3557,15 @@ bool TypeSystemClang::IsPolymorphicClass(lldb::opaque_compiler_type_t type) { if (record_decl) { const clang::CXXRecordDecl *cxx_record_decl = llvm::dyn_cast(record_decl); - if (cxx_record_decl) - return cxx_record_decl->isPolymorphic(); + if (cxx_record_decl) { + // We can't just call is isPolymorphic() here because that just + // means the current class has virtual functions, it doesn't check + // if any inherited classes have virtual functions. The doc string + // in SBType::IsPolymorphicClass() says it is looking for both + // if the class has virtual methods or if any bases do, so this + // should be more correct. + return cxx_record_decl->isDynamicClass(); + } } } break; @@ -4708,6 +4715,21 @@ TypeSystemClang::GetTypedefedType(lldb::opaque_compiler_type_t type) { CompilerType TypeSystemClang::GetBasicTypeFromAST(lldb::BasicType basic_type) { return TypeSystemClang::GetBasicType(basic_type); } + +CompilerType TypeSystemClang::CreateGenericFunctionPrototype() { + clang::ASTContext &ast = getASTContext(); + const FunctionType::ExtInfo generic_ext_info( + /*noReturn=*/false, + /*hasRegParm=*/false, + /*regParm=*/0, + CallingConv::CC_C, + /*producesResult=*/false, + /*noCallerSavedRegs=*/false, + /*NoCfCheck=*/false, + /*cmseNSCall=*/false); + QualType func_type = ast.getFunctionNoProtoType(ast.VoidTy, generic_ext_info); + return GetType(func_type); +} // Exploring the type const llvm::fltSemantics & @@ -4824,7 +4846,7 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type, case clang::Type::FunctionNoProto: case clang::Type::FunctionProto: - break; + return lldb::eEncodingUint; case clang::Type::IncompleteArray: case clang::Type::VariableArray: diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 66e59ec985fb89..0ec2d026e99610 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -802,6 +802,10 @@ class TypeSystemClang : public TypeSystem { // Create related types using the current type's AST CompilerType GetBasicTypeFromAST(lldb::BasicType basic_type) override; + // Create a generic function prototype that can be used in ValuObject types + // to correctly display a function pointer with the right value and summary. + CompilerType CreateGenericFunctionPrototype() override; + // Exploring the type const llvm::fltSemantics &GetFloatTypeSemantics(size_t byte_size) override; diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp index 006a843bacd2e1..54eeace93b9648 100644 --- a/lldb/source/Symbol/Type.cpp +++ b/lldb/source/Symbol/Type.cpp @@ -748,6 +748,10 @@ void TypeAndOrName::SetName(const char *type_name_cstr) { m_type_name.SetCString(type_name_cstr); } +void TypeAndOrName::SetName(llvm::StringRef type_name) { + m_type_name.SetString(type_name); +} + void TypeAndOrName::SetTypeSP(lldb::TypeSP type_sp) { if (type_sp) { m_compiler_type = type_sp->GetForwardCompilerType(); diff --git a/lldb/test/API/functionalities/vtable/Makefile b/lldb/test/API/functionalities/vtable/Makefile new file mode 100644 index 00000000000000..99998b20bcb050 --- /dev/null +++ b/lldb/test/API/functionalities/vtable/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/functionalities/vtable/TestVTableValue.py b/lldb/test/API/functionalities/vtable/TestVTableValue.py new file mode 100644 index 00000000000000..5b243e0646f4c2 --- /dev/null +++ b/lldb/test/API/functionalities/vtable/TestVTableValue.py @@ -0,0 +1,186 @@ +""" +Make sure the getting a variable path works and doesn't crash. +""" + + +import lldb +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * + +class TestVTableValue(TestBase): + # If your test case doesn't stress debug info, then + # set this to true. That way it won't be run once for + # each debug info format. + NO_DEBUG_INFO_TESTCASE = True + + @skipUnlessPlatform(["linux", "macosx"]) + def test_vtable(self): + self.build() + lldbutil.run_to_source_breakpoint( + self, "At the end", lldb.SBFileSpec("main.cpp") + ) + + # Test a shape instance to make sure we get the vtable correctly. + shape = self.frame().FindVariable("shape") + vtable = shape.GetVTable() + self.assertEquals(vtable.GetName(), "vtable for Shape") + self.assertEquals(vtable.GetTypeName(), "vtable for Shape") + # Make sure we have the right number of virtual functions in our vtable + # for the shape class. + self.assertEquals(vtable.GetNumChildren(), 4) + + # Verify vtable address + vtable_addr = vtable.GetValueAsUnsigned(0) + expected_addr = self.expected_vtable_addr(shape) + self.assertEquals(vtable_addr, expected_addr) + + for (idx, vtable_entry) in enumerate(vtable.children): + self.verify_vtable_entry(vtable_entry, vtable_addr, idx) + + # Test a shape reference to make sure we get the vtable correctly. + shape = self.frame().FindVariable("shape_ref") + vtable = shape.GetVTable() + self.assertEquals(vtable.GetName(), "vtable for Shape") + self.assertEquals(vtable.GetTypeName(), "vtable for Shape") + # Make sure we have the right number of virtual functions in our vtable + # for the shape class. + self.assertEquals(vtable.GetNumChildren(), 4) + + # Verify vtable address + vtable_addr = vtable.GetValueAsUnsigned(0) + expected_addr = self.expected_vtable_addr(shape) + self.assertEquals(vtable_addr, expected_addr) + + for (idx, vtable_entry) in enumerate(vtable.children): + self.verify_vtable_entry(vtable_entry, vtable_addr, idx) + + + # Test we get the right vtable for the Rectangle instance. + rect = self.frame().FindVariable("rect") + vtable = rect.GetVTable() + self.assertEquals(vtable.GetName(), "vtable for Rectangle") + self.assertEquals(vtable.GetTypeName(), "vtable for Rectangle") + + # Make sure we have the right number of virtual functions in our vtable + # with the extra virtual function added by the Rectangle class + self.assertEquals(vtable.GetNumChildren(), 5) + + # Verify vtable address + vtable_addr = vtable.GetValueAsUnsigned() + expected_addr = self.expected_vtable_addr(rect) + self.assertEquals(vtable_addr, expected_addr) + + for (idx, vtable_entry) in enumerate(vtable.children): + self.verify_vtable_entry(vtable_entry, vtable_addr, idx) + + @skipUnlessPlatform(["linux", "macosx"]) + def test_base_class_ptr(self): + self.build() + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint( + self, "Shape is Rectangle", lldb.SBFileSpec("main.cpp") + ) + + shape = self.frame().FindVariable("shape") + rect = self.frame().FindVariable("rect") + + shape_ptr = self.frame().FindVariable("shape_ptr") + shape_ptr_vtable = shape_ptr.GetVTable() + self.assertEquals(shape_ptr_vtable.GetName(), "vtable for Rectangle") + self.assertEquals(shape_ptr_vtable.GetNumChildren(), 5) + self.assertEquals(shape_ptr.GetValueAsUnsigned(0), + rect.GetLoadAddress()) + lldbutil.continue_to_source_breakpoint( + self, process, "Shape is Shape", lldb.SBFileSpec("main.cpp") + ) + self.assertEquals(shape_ptr.GetValueAsUnsigned(0), + shape.GetLoadAddress()) + self.assertEquals(shape_ptr_vtable.GetNumChildren(), 4) + self.assertEquals(shape_ptr_vtable.GetName(), "vtable for Shape") + + @skipUnlessPlatform(["linux", "macosx"]) + def test_no_vtable(self): + self.build() + lldbutil.run_to_source_breakpoint( + self, "At the end", lldb.SBFileSpec("main.cpp") + ) + + var = self.frame().FindVariable("not_virtual") + self.assertEqual(var.GetVTable().GetError().GetCString(), + 'type "NotVirtual" doesn\'t have a vtable') + + var = self.frame().FindVariable("argc") + self.assertEqual(var.GetVTable().GetError().GetCString(), + 'no language runtime support for the language "c"') + + @skipUnlessPlatform(["linux", "macosx"]) + def test_overwrite_vtable(self): + self.build() + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint( + self, "At the end", lldb.SBFileSpec("main.cpp") + ) + + # Test a shape instance to make sure we get the vtable correctly. + shape = self.frame().FindVariable("shape") + vtable = shape.GetVTable() + self.assertEquals(vtable.GetName(), "vtable for Shape") + self.assertEquals(vtable.GetTypeName(), "vtable for Shape") + # Make sure we have the right number of virtual functions in our vtable + # for the shape class. + self.assertEquals(vtable.GetNumChildren(), 4) + + # Overwrite the first entry in the vtable and make sure we can still + # see the bogus value which should have no summary + vtable_addr = vtable.GetValueAsUnsigned() + data = str("\x01\x01\x01\x01\x01\x01\x01\x01") + error = lldb.SBError() + process.WriteMemory(vtable_addr, data, error) + + scribbled_child = vtable.GetChildAtIndex(0) + self.assertEquals(scribbled_child.GetValueAsUnsigned(0), + 0x0101010101010101) + self.assertEquals(scribbled_child.GetSummary(), None) + + def expected_vtable_addr(self, var: lldb.SBValue) -> int: + load_addr = var.GetLoadAddress() + read_from_memory_error = lldb.SBError() + vtable_addr = self.process().ReadPointerFromMemory( + load_addr, read_from_memory_error + ) + self.assertTrue(read_from_memory_error.Success()) + return vtable_addr + + def expected_vtable_entry_func_ptr(self, vtable_addr: int, idx: int): + vtable_entry_addr = vtable_addr + idx * self.process().GetAddressByteSize() + read_func_ptr_error = lldb.SBError() + func_ptr = self.process().ReadPointerFromMemory(vtable_entry_addr, + read_func_ptr_error) + self.assertTrue(read_func_ptr_error.Success()) + return func_ptr + + def verify_vtable_entry(self, vtable_entry: lldb.SBValue, vtable_addr: int, + idx: int): + """Verify the vtable entry looks something like: + + (double ()) [0] = 0x0000000100003a10 a.out`Rectangle::Area() at main.cpp:14 + + """ + # Check function ptr + vtable_entry_func_ptr = vtable_entry.GetValueAsUnsigned(0) + self.assertEquals( + vtable_entry_func_ptr, + self.expected_vtable_entry_func_ptr(vtable_addr, idx), + ) + + sb_addr = self.target().ResolveLoadAddress(vtable_entry_func_ptr) + sym_ctx = sb_addr.GetSymbolContext(lldb.eSymbolContextEverything) + + # Make sure the type is the same as the function type + func_type = sym_ctx.GetFunction().GetType() + if func_type.IsValid(): + self.assertEquals(vtable_entry.GetType(), + func_type.GetPointerType()) + + # The summary should be the address description of the function pointer + summary = vtable_entry.GetSummary() + self.assertEquals(str(sb_addr), summary) diff --git a/lldb/test/API/functionalities/vtable/main.cpp b/lldb/test/API/functionalities/vtable/main.cpp new file mode 100644 index 00000000000000..498a5765a3f6ff --- /dev/null +++ b/lldb/test/API/functionalities/vtable/main.cpp @@ -0,0 +1,38 @@ +class Shape { +public: + virtual double Area() { return 1.0; } + virtual double Perimeter() { return 1.0; } + // Note that destructors generate two entries in the vtable: base object + // destructor and deleting destructor. + virtual ~Shape() = default; +}; + +class Rectangle : public Shape { +public: + ~Rectangle() override = default; + double Area() override { return 2.0; } + double Perimeter() override { return 2.0; } + virtual void RectangleOnly() {} + // This *shouldn't* show up in the vtable. + void RectangleSpecific() { return; } +}; + +// Make a class that looks like it would be virtual because the first ivar is +// a virtual class and if we inspect memory at the address of this class it +// would appear to be a virtual class. We need to make sure we don't get a +// valid vtable from this object. +class NotVirtual { + Rectangle m_rect; +public: + NotVirtual() = default; +}; + +int main(int argc, const char **argv) { + Shape shape; + Rectangle rect; + Shape *shape_ptr = ▭ + Shape &shape_ref = shape; + shape_ptr = &shape; // Shape is Rectangle + NotVirtual not_virtual; // Shape is Shape + return 0; // At the end +} From ba67365abd26c703f72b0260fa70408d3fa6a870 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 31 Oct 2023 00:46:41 +0000 Subject: [PATCH 106/144] [gn build] Port 7fbd427f5ebe --- llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn index 297f9857cec054..30a9fb3ecceaa0 100644 --- a/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn @@ -92,6 +92,7 @@ static_library("Core") { "ValueObjectRegister.cpp", "ValueObjectSyntheticFilter.cpp", "ValueObjectUpdater.cpp", + "ValueObjectVTable.cpp", "ValueObjectVariable.cpp", ] } From 3343bd90136ff49cf7eeb6ff8a5c0cd8dbceab55 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 30 Oct 2023 21:10:32 -0400 Subject: [PATCH 107/144] [gn] port 15b37e1cfa5f (no xf on msvc/android) --- .../compiler-rt/lib/builtins/BUILD.gn | 65 ++++++++++++------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index 3a19729bb8dcf3..800a647ad64483 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -8,6 +8,11 @@ declare_args() { lse_targets = [] +if (current_cpu == "x86" || current_cpu == "x64") { + # long double is not 80 bits on Android or MSVC. + long_double_is_80_bits = current_os != "android" && current_os != "win" +} + if (current_cpu == "arm64") { foreach(pat, [ @@ -270,24 +275,28 @@ static_library("builtins") { sources -= [ "fp_mode.c" ] sources += [ "cpu_model.c", - "divxc3.c", - "extendxftf2.c", - "fixunsxfdi.c", - "fixunsxfsi.c", - "fixunsxfti.c", - "fixxfdi.c", - "fixxfti.c", - "floatdixf.c", - "floattixf.c", - "floatundixf.c", - "floatuntixf.c", "i386/fp_mode.c", - "mulxc3.c", - "powixf2.c", "truncdfbf2.c", "truncsfbf2.c", - "trunctfxf2.c", ] + if (long_double_is_80_bits) { + sources += [ + "divxc3.c", + "extendxftf2.c", + "fixunsxfdi.c", + "fixunsxfsi.c", + "fixunsxfti.c", + "fixxfdi.c", + "fixxfti.c", + "floatdixf.c", + "floattixf.c", + "floatundixf.c", + "floatuntixf.c", + "mulxc3.c", + "powixf2.c", + "trunctfxf2.c", + ] + } } if (current_cpu == "x86") { sources -= [ @@ -296,10 +305,8 @@ static_library("builtins") { "divdi3.c", "floatdidf.c", "floatdisf.c", - "floatdixf.c", "floatundidf.c", "floatundisf.c", - "floatundixf.c", "lshrdi3.c", "moddi3.c", "muldi3.c", @@ -312,16 +319,24 @@ static_library("builtins") { "i386/divdi3.S", "i386/floatdidf.S", "i386/floatdisf.S", - "i386/floatdixf.S", "i386/floatundidf.S", "i386/floatundisf.S", - "i386/floatundixf.S", "i386/lshrdi3.S", "i386/moddi3.S", "i386/muldi3.S", "i386/udivdi3.S", "i386/umoddi3.S", ] + if (long_double_is_80_bits) { + sources -= [ + "floatdixf.c", + "floatundixf.c", + ] + sources += [ + "i386/floatdixf.S", + "i386/floatundixf.S", + ] + } if (current_os == "win") { sources += [ "i386/chkstk.S" ] } @@ -329,19 +344,25 @@ static_library("builtins") { sources -= [ "floatdidf.c", "floatdisf.c", - "floatdixf.c", "floatundidf.c", "floatundisf.c", - "floatundixf.c", ] sources += [ "x86_64/floatdidf.c", "x86_64/floatdisf.c", - "x86_64/floatdixf.c", "x86_64/floatundidf.S", "x86_64/floatundisf.S", - "x86_64/floatundixf.S", ] + if (long_double_is_80_bits) { + sources -= [ + "floatdixf.c", + "floatundixf.c", + ] + sources += [ + "x86_64/floatdixf.c", + "x86_64/floatundixf.S", + ] + } if (current_os == "win") { sources += [ "x86_64/chkstk.S" ] } From b0e00ca6a605b88e83129c8c6be4e177f93cbfea Mon Sep 17 00:00:00 2001 From: Maksim Levental Date: Mon, 30 Oct 2023 20:22:27 -0500 Subject: [PATCH 108/144] [mlir][python] fix `replace=True` for `register_operation` and `register_type_caster` (#70264)